From 81af4c2f9af58294a9ea48745461730ad799f0e5 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 9 May 2024 11:33:21 +0100 Subject: [PATCH 1/9] Added all-in-one command to Makefile --- Makefile | 117 ++++++++++++++++++ contrib/configuration/default-dsc.yaml | 34 +++++ .../nfd-operator-subscription.yaml | 22 ++++ .../nvidia-operator-subscription.yaml | 22 ++++ .../opendatahub-operator-subscription.yaml | 13 ++ .../rhoai-operator-subscription.yaml | 13 ++ 6 files changed, 221 insertions(+) create mode 100644 contrib/configuration/default-dsc.yaml create mode 100644 contrib/configuration/nfd-operator-subscription.yaml create mode 100644 contrib/configuration/nvidia-operator-subscription.yaml create mode 100644 contrib/configuration/opendatahub-operator-subscription.yaml create mode 100644 contrib/configuration/rhoai-operator-subscription.yaml diff --git a/Makefile b/Makefile index b2673c4f8..690f12493 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,7 @@ BUNDLE_VERSION ?= $(VERSION:v%=%) # KUEUE_VERSION defines the default version of Kueue (used for testing) KUEUE_VERSION ?= v0.6.2 +USE_RHOAI ?= true # KUBERAY_VERSION defines the default version of the KubeRay operator (used for testing) KUBERAY_VERSION ?= v1.1.0 @@ -419,3 +420,119 @@ image-mnist-job-test-push: image-mnist-job-test-build ## Push container image wi .PHONY: kueue-setup kueue-setup: bash scripts/setup-kueue-resources.sh +# RHOAI/ODH related resources installation + +##@ all-in-one +.PHONY: all-in-one +all-in-one: + @echo -e "\n ==> Installing Everything needed for distributed AI platform on OpenShift cluster \n" + -make delete-nfd-operator + -make delete-ai-platform-operator + -make delete-nvidia-operator + -make install-ai-platform-operator + -make install-nfd-operator + -make install-nvidia-operator + +.PHONY: delete-all-in-one +delete-all-in-one: + @echo -e "\n ==> Removing Everything needed for distributed AI platform on OpenShift cluster \n" + -make delete-rhoai + -make delete-nfd-operator + -make delete-nvidia-operator + -make delete-ai-platform-operator + +##@ general +.PHONY: delete-ai-platform-operator +delete-ai-platform-operator: +ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator + -make delete-rhoai-operator +else ## Delete Open Data Hub Operator + -make delete-opendatahub-operator +endif + +.PHONY: install-ai-platform-operator +install-ai-platform-operator: +ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator + -make install-rhoai-operator +else ## Delete Open Data Hub Operator + -make install-opendatahub-operator +endif + +.PHONY: delete-rhoai-operator +delete-rhoai-operator: ## Delete RHOAI Operator + @echo -e "\n==> Deleting OpenShift AI Operator \n" + -oc delete subscription rhods-operator -n redhat-ods-operator + -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -o custom-columns=:metadata.name`; \ + oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n redhat-ods-operator + +.PHONY: install-rhoai-operator +install-rhoai-operator: ## Install RHOAI Operator + @echo -e "\n==> Installing OpenShift AI Operator \n" + -oc create ns redhat-ods-operator + oc create -f contrib/configuration/rhoai-operator-subscription.yaml + @echo Waiting for rhoai-operator Subscription to be ready + oc wait -n redhat-ods-operator subscription/rhods-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + @echo -e "\n==> Creating default Data Science Cluster \n" + oc apply -f contrib/configuration/default-dsc.yaml + +.PHONY: delete-opendatahub-operator +delete-opendatahub-operator: ## Delete OpenDataHub operator + @echo -e "\n==> Deleting OpenDataHub Operator \n" + -oc delete subscription opendatahub-operator -n openshift-operators + -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n openshift-operators -l operators.coreos.com/opendatahub-operator.openshift-operators -o custom-columns=:metadata.name`; \ + oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-operators + +.PHONY: install-opendatahub-operator +install-opendatahub-operator: ## Install OpenDataHub operator + @echo -e "\n==> Installing OpenDataHub Operator \n" + -oc create ns opendatahub + oc create -f contrib/configuration/opendatahub-operator-subscription.yaml + @echo Waiting for opendatahub-operator Subscription to be ready + oc wait -n openshift-operators subscription/opendatahub-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + +##@ GPU Support +.PHONY: install-nfd-operator +install-nfd-operator: ## Install NFD operator ( Node Feature Discovery ) + @echo -e "\n==> Installing NFD Operator \n" + -oc create ns openshift-nfd + oc create -f contrib/configuration/nfd-operator-subscription.yaml + @echo -e "\n==> Creating default NodeFeatureDiscovery CR \n" + @while [[ -z $$(oc get customresourcedefinition nodefeaturediscoveries.nfd.openshift.io) ]]; do echo "."; sleep 10; done + @while [[ -z $$(oc get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd) ]]; do echo "."; sleep 10; done + oc get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq '.[] | select(.kind=="NodeFeatureDiscovery")' | oc apply -f - + +.PHONY: delete-nfd-operator +delete-nfd-operator: ## Delete NFD operator + @echo -e "\n==> Deleting NodeFeatureDiscovery CR \n" + oc delete NodeFeatureDiscovery --all -n openshift-nfd + @while [[ -n $$(oc get NodeFeatureDiscovery -n openshift-nfd) ]]; do echo "."; sleep 10; done + @echo -e "\n==> Deleting NFD Operator \n" + -oc delete subscription nfd -n openshift-nfd + -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n openshift-nfd -l operators.coreos.com/nfd.openshift-nfd -o custom-columns=:metadata.name`; \ + oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-nfd + -oc delete ns openshift-nfd + +.PHONY: install-nvidia-operator +install-nvidia-operator: ## Install nvidia operator + @echo -e "\n==> Installing nvidia Operator \n" + -oc create ns nvidia-gpu-operator + oc create -f contrib/configuration/nvidia-operator-subscription.yaml + @echo -e "\n==> Creating default ClusterPolicy CR \n" + @while [[ -z $$(oc get customresourcedefinition clusterpolicies.nvidia.com) ]]; do echo "."; sleep 10; done + @while [[ -z $$(oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator) ]]; do echo "."; sleep 10; done + oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq .[] | oc apply -f - +#ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator +# oc delete configmap migration-gpu-status -n redhat-ods-applications +# -export REPLICASET_NAME=`oc get replicaset -n redhat-ods-applications -l app=rhods-dashboard -o custom-columns=:metadata.name` +# oc delete replicaset $$REPLICASET_NAME -n redhat-ods-applications +#endif +.PHONY: delete-nvidia-operator +delete-nvidia-operator: ## Delete nvidia operator + @echo -e "\n==> Deleting ClusterPolicy CR \n" + oc delete ClusterPolicy --all -n nvidia-gpu-operator + @while [[ -n $$(oc get ClusterPolicy -n nvidia-gpu-operator) ]]; do echo "."; sleep 10; done + @echo -e "\n==> Deleting nvidia Operator \n" + -oc delete subscription gpu-operator-certified -n nvidia-gpu-operator + -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n nvidia-gpu-operator -l operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -o custom-columns=:metadata.name`; \ + oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n nvidia-gpu-operator + -oc delete ns nvidia-gpu-operator diff --git a/contrib/configuration/default-dsc.yaml b/contrib/configuration/default-dsc.yaml new file mode 100644 index 000000000..718d87b07 --- /dev/null +++ b/contrib/configuration/default-dsc.yaml @@ -0,0 +1,34 @@ +kind: DataScienceCluster +apiVersion: datasciencecluster.opendatahub.io/v1 +metadata: + labels: + app.kubernetes.io/created-by: rhods-operator + app.kubernetes.io/instance: default-dsc + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: datasciencecluster + app.kubernetes.io/part-of: rhods-operator + name: default-dsc +spec: + components: + codeflare: + managementState: Managed + dashboard: + managementState: Managed + datasciencepipelines: + managementState: Managed + kserve: + managementState: Managed + serving: + ingressGateway: + certificate: + type: SelfSigned + managementState: Managed + name: knative-serving + kueue: + managementState: Managed + modelmeshserving: + managementState: Managed + ray: + managementState: Managed + workbenches: + managementState: Managed diff --git a/contrib/configuration/nfd-operator-subscription.yaml b/contrib/configuration/nfd-operator-subscription.yaml new file mode 100644 index 000000000..5653ac3a9 --- /dev/null +++ b/contrib/configuration/nfd-operator-subscription.yaml @@ -0,0 +1,22 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: nfd + namespace: openshift-nfd +spec: + targetNamespaces: + - openshift-nfd +--- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: nfd + labels: + operators.coreos.com/nfd.openshift-nfd: '' + namespace: openshift-nfd +spec: + channel: stable + name: nfd + installPlanApproval: Automatic + source: redhat-operators + sourceNamespace: openshift-marketplace diff --git a/contrib/configuration/nvidia-operator-subscription.yaml b/contrib/configuration/nvidia-operator-subscription.yaml new file mode 100644 index 000000000..3fa10297c --- /dev/null +++ b/contrib/configuration/nvidia-operator-subscription.yaml @@ -0,0 +1,22 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: gpu-operator-certified + namespace: nvidia-gpu-operator +spec: + targetNamespaces: + - opendatahub +--- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: gpu-operator-certified + labels: + operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator: '' + namespace: nvidia-gpu-operator +spec: + channel: stable + name: gpu-operator-certified + installPlanApproval: Automatic + source: certified-operators + sourceNamespace: openshift-marketplace diff --git a/contrib/configuration/opendatahub-operator-subscription.yaml b/contrib/configuration/opendatahub-operator-subscription.yaml new file mode 100644 index 000000000..6f7521595 --- /dev/null +++ b/contrib/configuration/opendatahub-operator-subscription.yaml @@ -0,0 +1,13 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: opendatahub-operator + labels: + operators.coreos.com/opendatahub-operator.openshift-operators: '' + namespace: openshift-operators +spec: + channel: fast + name: opendatahub-operator + installPlanApproval: Automatic + source: community-operators + sourceNamespace: openshift-marketplace diff --git a/contrib/configuration/rhoai-operator-subscription.yaml b/contrib/configuration/rhoai-operator-subscription.yaml new file mode 100644 index 000000000..e5c133c55 --- /dev/null +++ b/contrib/configuration/rhoai-operator-subscription.yaml @@ -0,0 +1,13 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: rhods-operator + labels: + operators.coreos.com/rhods-operator.redhat-ods-operator: '' + namespace: redhat-ods-operator +spec: + channel: fast + name: rhods-operator + installPlanApproval: Automatic + source: redhat-operators + sourceNamespace: openshift-marketplace From 4e76f691dbab764e665a373749fd4511b204a2bb Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 9 May 2024 16:08:37 +0100 Subject: [PATCH 2/9] Added additional GPU step for RHOAI --- Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 690f12493..323ce5865 100644 --- a/Makefile +++ b/Makefile @@ -521,11 +521,12 @@ install-nvidia-operator: ## Install nvidia operator @while [[ -z $$(oc get customresourcedefinition clusterpolicies.nvidia.com) ]]; do echo "."; sleep 10; done @while [[ -z $$(oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator) ]]; do echo "."; sleep 10; done oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq .[] | oc apply -f - -#ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator -# oc delete configmap migration-gpu-status -n redhat-ods-applications -# -export REPLICASET_NAME=`oc get replicaset -n redhat-ods-applications -l app=rhods-dashboard -o custom-columns=:metadata.name` -# oc delete replicaset $$REPLICASET_NAME -n redhat-ods-applications -#endif +ifeq ($(USE_RHOAI), true) ## Additional steps required for RHOAI + oc delete configmap migration-gpu-status -n redhat-ods-applications --ignore-not-found=true + -export REPLICASET_NAME=`oc get replicaset -n redhat-ods-applications -l app=rhods-dashboard -o custom-columns=:metadata.name`; \ + oc delete replicaset $$REPLICASET_NAME -n redhat-ods-applications +endif + .PHONY: delete-nvidia-operator delete-nvidia-operator: ## Delete nvidia operator @echo -e "\n==> Deleting ClusterPolicy CR \n" From 394f06f50c866133c362ad0c3bd7fa69d182f4db Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 13 May 2024 14:19:17 +0100 Subject: [PATCH 3/9] Converted oc to kubectl & updated resource structure --- Makefile | 85 ++++++++++--------- contrib/configuration/odh/default-dsc.yaml | 40 +++++++++ contrib/configuration/odh/default-dsci.yaml | 24 ++++++ .../opendatahub-operator-subscription.yaml | 0 .../{ => rhoai}/default-dsc.yaml | 0 contrib/configuration/rhoai/default-dsci.yaml | 24 ++++++ .../rhoai-operator-subscription.yaml | 0 7 files changed, 134 insertions(+), 39 deletions(-) create mode 100644 contrib/configuration/odh/default-dsc.yaml create mode 100644 contrib/configuration/odh/default-dsci.yaml rename contrib/configuration/{ => odh}/opendatahub-operator-subscription.yaml (100%) rename contrib/configuration/{ => rhoai}/default-dsc.yaml (100%) create mode 100644 contrib/configuration/rhoai/default-dsci.yaml rename contrib/configuration/{ => rhoai}/rhoai-operator-subscription.yaml (100%) diff --git a/Makefile b/Makefile index 323ce5865..5a0e64c64 100644 --- a/Makefile +++ b/Makefile @@ -429,8 +429,8 @@ all-in-one: -make delete-nfd-operator -make delete-ai-platform-operator -make delete-nvidia-operator - -make install-ai-platform-operator -make install-nfd-operator + -make install-ai-platform-operator -make install-nvidia-operator .PHONY: delete-all-in-one @@ -461,79 +461,86 @@ endif .PHONY: delete-rhoai-operator delete-rhoai-operator: ## Delete RHOAI Operator @echo -e "\n==> Deleting OpenShift AI Operator \n" - -oc delete subscription rhods-operator -n redhat-ods-operator - -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -o custom-columns=:metadata.name`; \ - oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n redhat-ods-operator + -kubectl delete subscription rhods-operator -n redhat-ods-operator + -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -o custom-columns=:metadata.name`; \ + kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n redhat-ods-operator + kubectl delete namespace redhat-ods-operator .PHONY: install-rhoai-operator install-rhoai-operator: ## Install RHOAI Operator @echo -e "\n==> Installing OpenShift AI Operator \n" - -oc create ns redhat-ods-operator - oc create -f contrib/configuration/rhoai-operator-subscription.yaml + -kubectl create ns redhat-ods-operator + kubectl create -f contrib/configuration/rhoai/rhoai-operator-subscription.yaml @echo Waiting for rhoai-operator Subscription to be ready - oc wait -n redhat-ods-operator subscription/rhods-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + kubectl wait -n redhat-ods-operator subscription/rhods-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s @echo -e "\n==> Creating default Data Science Cluster \n" - oc apply -f contrib/configuration/default-dsc.yaml + kubectl apply -f contrib/configuration/rhoai/default-dsci.yaml --server-side + kubectl apply -f contrib/configuration/rhoai/default-dsc.yaml --server-side .PHONY: delete-opendatahub-operator delete-opendatahub-operator: ## Delete OpenDataHub operator @echo -e "\n==> Deleting OpenDataHub Operator \n" - -oc delete subscription opendatahub-operator -n openshift-operators - -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n openshift-operators -l operators.coreos.com/opendatahub-operator.openshift-operators -o custom-columns=:metadata.name`; \ - oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-operators + -kubectl delete subscription opendatahub-operator -n openshift-operators + -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n openshift-operators -l operators.coreos.com/opendatahub-operator.openshift-operators -o custom-columns=:metadata.name`; \ + kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-operators + -kubectl delete namespace opendatahub .PHONY: install-opendatahub-operator install-opendatahub-operator: ## Install OpenDataHub operator @echo -e "\n==> Installing OpenDataHub Operator \n" - -oc create ns opendatahub - oc create -f contrib/configuration/opendatahub-operator-subscription.yaml + -kubectl create ns opendatahub + kubectl create -f contrib/configuration/odh/opendatahub-operator-subscription.yaml @echo Waiting for opendatahub-operator Subscription to be ready - oc wait -n openshift-operators subscription/opendatahub-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + kubectl wait -n openshift-operators subscription/opendatahub-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + sleep 2 + kubectl wait --for=condition=available deployment/opendatahub-operator-controller-manager -n openshift-operators --timeout=180s + kubectl apply -f contrib/configuration/odh/default-dsci.yaml --server-side + kubectl apply -f contrib/configuration/odh/default-dsc.yaml --server-side ##@ GPU Support .PHONY: install-nfd-operator install-nfd-operator: ## Install NFD operator ( Node Feature Discovery ) @echo -e "\n==> Installing NFD Operator \n" - -oc create ns openshift-nfd - oc create -f contrib/configuration/nfd-operator-subscription.yaml + -kubectl create ns openshift-nfd + kubectl create -f contrib/configuration/nfd-operator-subscription.yaml @echo -e "\n==> Creating default NodeFeatureDiscovery CR \n" - @while [[ -z $$(oc get customresourcedefinition nodefeaturediscoveries.nfd.openshift.io) ]]; do echo "."; sleep 10; done - @while [[ -z $$(oc get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd) ]]; do echo "."; sleep 10; done - oc get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq '.[] | select(.kind=="NodeFeatureDiscovery")' | oc apply -f - + @while [[ -z $$(kubectl get customresourcedefinition nodefeaturediscoveries.nfd.openshift.io) ]]; do echo "."; sleep 10; done + @while [[ -z $$(kubectl get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd) ]]; do echo "."; sleep 10; done + kubectl get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq '.[] | select(.kind=="NodeFeatureDiscovery")' | kubectl apply -f - .PHONY: delete-nfd-operator delete-nfd-operator: ## Delete NFD operator @echo -e "\n==> Deleting NodeFeatureDiscovery CR \n" - oc delete NodeFeatureDiscovery --all -n openshift-nfd - @while [[ -n $$(oc get NodeFeatureDiscovery -n openshift-nfd) ]]; do echo "."; sleep 10; done + kubectl delete NodeFeatureDiscovery --all -n openshift-nfd + @while [[ -n $$(kubectl get NodeFeatureDiscovery -n openshift-nfd) ]]; do echo "."; sleep 10; done @echo -e "\n==> Deleting NFD Operator \n" - -oc delete subscription nfd -n openshift-nfd - -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n openshift-nfd -l operators.coreos.com/nfd.openshift-nfd -o custom-columns=:metadata.name`; \ - oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-nfd - -oc delete ns openshift-nfd + -kubectl delete subscription nfd -n openshift-nfd + -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n openshift-nfd -l operators.coreos.com/nfd.openshift-nfd -o custom-columns=:metadata.name`; \ + kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-nfd + -kubectl delete ns openshift-nfd .PHONY: install-nvidia-operator install-nvidia-operator: ## Install nvidia operator @echo -e "\n==> Installing nvidia Operator \n" - -oc create ns nvidia-gpu-operator - oc create -f contrib/configuration/nvidia-operator-subscription.yaml + -kubectl create ns nvidia-gpu-operator + kubectl create -f contrib/configuration/nvidia-operator-subscription.yaml @echo -e "\n==> Creating default ClusterPolicy CR \n" - @while [[ -z $$(oc get customresourcedefinition clusterpolicies.nvidia.com) ]]; do echo "."; sleep 10; done - @while [[ -z $$(oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator) ]]; do echo "."; sleep 10; done - oc get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq .[] | oc apply -f - + @while [[ -z $$(kubectl get customresourcedefinition clusterpolicies.nvidia.com) ]]; do echo "."; sleep 10; done + @while [[ -z $$(kubectl get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator) ]]; do echo "."; sleep 10; done + kubectl get csv -n nvidia-gpu-operator --selector operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq .[] | kubectl apply -f - ifeq ($(USE_RHOAI), true) ## Additional steps required for RHOAI - oc delete configmap migration-gpu-status -n redhat-ods-applications --ignore-not-found=true - -export REPLICASET_NAME=`oc get replicaset -n redhat-ods-applications -l app=rhods-dashboard -o custom-columns=:metadata.name`; \ - oc delete replicaset $$REPLICASET_NAME -n redhat-ods-applications + kubectl delete configmap migration-gpu-status -n redhat-ods-applications --ignore-not-found=true + -export REPLICASET_NAME=`kubectl get replicaset -n redhat-ods-applications -l app=rhods-dashboard -o custom-columns=:metadata.name`; \ + kubectl delete replicaset $$REPLICASET_NAME -n redhat-ods-applications endif .PHONY: delete-nvidia-operator delete-nvidia-operator: ## Delete nvidia operator @echo -e "\n==> Deleting ClusterPolicy CR \n" - oc delete ClusterPolicy --all -n nvidia-gpu-operator - @while [[ -n $$(oc get ClusterPolicy -n nvidia-gpu-operator) ]]; do echo "."; sleep 10; done + kubectl delete ClusterPolicy --all -n nvidia-gpu-operator + @while [[ -n $$(kubectl get ClusterPolicy -n nvidia-gpu-operator) ]]; do echo "."; sleep 10; done @echo -e "\n==> Deleting nvidia Operator \n" - -oc delete subscription gpu-operator-certified -n nvidia-gpu-operator - -export CLUSTER_SERVICE_VERSION=`oc get clusterserviceversion -n nvidia-gpu-operator -l operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -o custom-columns=:metadata.name`; \ - oc delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n nvidia-gpu-operator - -oc delete ns nvidia-gpu-operator + -kubectl delete subscription gpu-operator-certified -n nvidia-gpu-operator + -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n nvidia-gpu-operator -l operators.coreos.com/gpu-operator-certified.nvidia-gpu-operator -o custom-columns=:metadata.name`; \ + kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n nvidia-gpu-operator + -kubectl delete ns nvidia-gpu-operator diff --git a/contrib/configuration/odh/default-dsc.yaml b/contrib/configuration/odh/default-dsc.yaml new file mode 100644 index 000000000..d5e4d902a --- /dev/null +++ b/contrib/configuration/odh/default-dsc.yaml @@ -0,0 +1,40 @@ +kind: DataScienceCluster +apiVersion: datasciencecluster.opendatahub.io/v1 +metadata: + labels: + app.kubernetes.io/created-by: opendatahub-operator + app.kubernetes.io/instance: default + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: datasciencecluster + app.kubernetes.io/part-of: opendatahub-operator + name: default-dsc +spec: + components: + codeflare: + managementState: Managed + dashboard: + managementState: Managed + datasciencepipelines: + managementState: Managed + kserve: + managementState: Managed + serving: + ingressGateway: + certificate: + type: SelfSigned + managementState: Managed + name: knative-serving + kueue: + managementState: Managed + modelmeshserving: + managementState: Managed + modelregistry: + managementState: Removed + ray: + managementState: Managed + trainingoperator: + managementState: Removed + trustyai: + managementState: Managed + workbenches: + managementState: Managed diff --git a/contrib/configuration/odh/default-dsci.yaml b/contrib/configuration/odh/default-dsci.yaml new file mode 100644 index 000000000..e9f1b2995 --- /dev/null +++ b/contrib/configuration/odh/default-dsci.yaml @@ -0,0 +1,24 @@ +kind: DSCInitialization +apiVersion: dscinitialization.opendatahub.io/v1 +metadata: + labels: + app.kubernetes.io/created-by: opendatahub-operator + app.kubernetes.io/instance: default + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: dscinitialization + app.kubernetes.io/part-of: opendatahub-operator + name: default-dsci +spec: + applicationsNamespace: opendatahub + monitoring: + managementState: Managed + namespace: opendatahub + serviceMesh: + controlPlane: + metricsCollection: Istio + name: data-science-smcp + namespace: istio-system + managementState: Managed + trustedCABundle: + customCABundle: '' + managementState: Managed diff --git a/contrib/configuration/opendatahub-operator-subscription.yaml b/contrib/configuration/odh/opendatahub-operator-subscription.yaml similarity index 100% rename from contrib/configuration/opendatahub-operator-subscription.yaml rename to contrib/configuration/odh/opendatahub-operator-subscription.yaml diff --git a/contrib/configuration/default-dsc.yaml b/contrib/configuration/rhoai/default-dsc.yaml similarity index 100% rename from contrib/configuration/default-dsc.yaml rename to contrib/configuration/rhoai/default-dsc.yaml diff --git a/contrib/configuration/rhoai/default-dsci.yaml b/contrib/configuration/rhoai/default-dsci.yaml new file mode 100644 index 000000000..91d3d1b00 --- /dev/null +++ b/contrib/configuration/rhoai/default-dsci.yaml @@ -0,0 +1,24 @@ +kind: DSCInitialization +apiVersion: dscinitialization.opendatahub.io/v1 +metadata: + labels: + app.kubernetes.io/created-by: rhods-operator + app.kubernetes.io/instance: default-dsci + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: dscinitialization + app.kubernetes.io/part-of: rhods-operator + name: default-dsci +spec: + applicationsNamespace: redhat-ods-applications + monitoring: + managementState: Managed + namespace: redhat-ods-monitoring + serviceMesh: + controlPlane: + metricsCollection: Istio + name: data-science-smcp + namespace: istio-system + managementState: Managed + trustedCABundle: + customCABundle: '' + managementState: Managed diff --git a/contrib/configuration/rhoai-operator-subscription.yaml b/contrib/configuration/rhoai/rhoai-operator-subscription.yaml similarity index 100% rename from contrib/configuration/rhoai-operator-subscription.yaml rename to contrib/configuration/rhoai/rhoai-operator-subscription.yaml From 60861f86b37d90f133cf40e41ca0297b7c4a7609 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 13 May 2024 14:46:43 +0100 Subject: [PATCH 4/9] Added Operator group for RHOAI --- .../configuration/rhoai/rhoai-operator-subscription.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/configuration/rhoai/rhoai-operator-subscription.yaml b/contrib/configuration/rhoai/rhoai-operator-subscription.yaml index e5c133c55..15373ca51 100644 --- a/contrib/configuration/rhoai/rhoai-operator-subscription.yaml +++ b/contrib/configuration/rhoai/rhoai-operator-subscription.yaml @@ -1,3 +1,9 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: rhods-operator + namespace: redhat-ods-operator +--- apiVersion: operators.coreos.com/v1alpha1 kind: Subscription metadata: From 7dd92d73f5d1cde7a7636da57747bb428c590261 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 13 May 2024 15:46:43 +0100 Subject: [PATCH 5/9] Review comments: unnecessary function call Review changes: removal of uneccesary deletion --- Makefile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 5a0e64c64..79f993ce0 100644 --- a/Makefile +++ b/Makefile @@ -422,13 +422,16 @@ kueue-setup: bash scripts/setup-kueue-resources.sh # RHOAI/ODH related resources installation +# Basic Usage +# all-in-one will create all resources necessary to create GPU enabled ML workloads via OpenShift AI +# Users have the choice between installing RHOAI and ODH +# For RHOAI use `make all-in-one` and to remove all of the operators run `make delete-all-in-one` +# For ODH use `make all-in-one -e USE_RHOAI=false` and to remove all of the operators run `make delete-all-in-one -e USE_RHOAI=false` + ##@ all-in-one .PHONY: all-in-one all-in-one: @echo -e "\n ==> Installing Everything needed for distributed AI platform on OpenShift cluster \n" - -make delete-nfd-operator - -make delete-ai-platform-operator - -make delete-nvidia-operator -make install-nfd-operator -make install-ai-platform-operator -make install-nvidia-operator @@ -436,10 +439,9 @@ all-in-one: .PHONY: delete-all-in-one delete-all-in-one: @echo -e "\n ==> Removing Everything needed for distributed AI platform on OpenShift cluster \n" - -make delete-rhoai -make delete-nfd-operator - -make delete-nvidia-operator -make delete-ai-platform-operator + -make delete-nvidia-operator ##@ general .PHONY: delete-ai-platform-operator @@ -460,7 +462,7 @@ endif .PHONY: delete-rhoai-operator delete-rhoai-operator: ## Delete RHOAI Operator - @echo -e "\n==> Deleting OpenShift AI Operator \n" + @echo -e "\n ==> Deleting OpenShift AI Operator \n" -kubectl delete subscription rhods-operator -n redhat-ods-operator -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -o custom-columns=:metadata.name`; \ kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n redhat-ods-operator @@ -468,7 +470,7 @@ delete-rhoai-operator: ## Delete RHOAI Operator .PHONY: install-rhoai-operator install-rhoai-operator: ## Install RHOAI Operator - @echo -e "\n==> Installing OpenShift AI Operator \n" + @echo -e "\n ==> Installing OpenShift AI Operator \n" -kubectl create ns redhat-ods-operator kubectl create -f contrib/configuration/rhoai/rhoai-operator-subscription.yaml @echo Waiting for rhoai-operator Subscription to be ready From e019bf143cb838df646abd779654268b18302243 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Tue, 14 May 2024 14:17:53 +0100 Subject: [PATCH 6/9] Fixed NFD --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 79f993ce0..5bfe77903 100644 --- a/Makefile +++ b/Makefile @@ -508,7 +508,7 @@ install-nfd-operator: ## Install NFD operator ( Node Feature Discovery ) @echo -e "\n==> Creating default NodeFeatureDiscovery CR \n" @while [[ -z $$(kubectl get customresourcedefinition nodefeaturediscoveries.nfd.openshift.io) ]]; do echo "."; sleep 10; done @while [[ -z $$(kubectl get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd) ]]; do echo "."; sleep 10; done - kubectl get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq '.[] | select(.kind=="NodeFeatureDiscovery")' | kubectl apply -f - + kubectl get csv -n openshift-nfd --selector operators.coreos.com/nfd.openshift-nfd -ojsonpath={.items[0].metadata.annotations.alm-examples} | jq '.[] | select(.kind=="NodeFeatureDiscovery")' | kubectl apply -f - --validate=false .PHONY: delete-nfd-operator delete-nfd-operator: ## Delete NFD operator From 321022d9edd62904acd474e794fc06bda55afcb9 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Mon, 27 May 2024 16:52:52 +0100 Subject: [PATCH 7/9] Added Service Mesh Operator && DSC deletion --- Makefile | 29 ++++++++++++++++++- .../service-mesh-operator-subscription.yaml | 13 +++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 contrib/configuration/service-mesh-operator-subscription.yaml diff --git a/Makefile b/Makefile index 5bfe77903..5de676ba5 100644 --- a/Makefile +++ b/Makefile @@ -433,6 +433,7 @@ kueue-setup: all-in-one: @echo -e "\n ==> Installing Everything needed for distributed AI platform on OpenShift cluster \n" -make install-nfd-operator + -make install-service-mesh-operator -make install-ai-platform-operator -make install-nvidia-operator @@ -441,6 +442,7 @@ delete-all-in-one: @echo -e "\n ==> Removing Everything needed for distributed AI platform on OpenShift cluster \n" -make delete-nfd-operator -make delete-ai-platform-operator + -make delete-service-mesh-operator -make delete-nvidia-operator ##@ general @@ -463,6 +465,10 @@ endif .PHONY: delete-rhoai-operator delete-rhoai-operator: ## Delete RHOAI Operator @echo -e "\n ==> Deleting OpenShift AI Operator \n" + kubectl delete datasciencecluster/default-dsc + kubectl wait --for=delete datasciencecluster/default-dsc --timeout=180s + kubectl delete dsci/default-dsci + kubectl wait --for=delete dsci/default-dsci --timeout=180s -kubectl delete subscription rhods-operator -n redhat-ods-operator -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n redhat-ods-operator -l operators.coreos.com/rhods-operator.redhat-ods-operator -o custom-columns=:metadata.name`; \ kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n redhat-ods-operator @@ -475,6 +481,8 @@ install-rhoai-operator: ## Install RHOAI Operator kubectl create -f contrib/configuration/rhoai/rhoai-operator-subscription.yaml @echo Waiting for rhoai-operator Subscription to be ready kubectl wait -n redhat-ods-operator subscription/rhods-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + -export RHOAI_POD_NAME=`kubectl get -n redhat-ods-operator pod -o custom-columns=:metadata.name | grep rhods-operator`; \ + kubectl wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' pod/$$RHOAI_POD_NAME -n redhat-ods-operator @echo -e "\n==> Creating default Data Science Cluster \n" kubectl apply -f contrib/configuration/rhoai/default-dsci.yaml --server-side kubectl apply -f contrib/configuration/rhoai/default-dsc.yaml --server-side @@ -482,6 +490,10 @@ install-rhoai-operator: ## Install RHOAI Operator .PHONY: delete-opendatahub-operator delete-opendatahub-operator: ## Delete OpenDataHub operator @echo -e "\n==> Deleting OpenDataHub Operator \n" + kubectl delete datasciencecluster/default-dsc + kubectl wait --for=delete datasciencecluster/default-dsc --timeout=180s + kubectl delete dsci/default-dsci + kubectl wait --for=delete dsci/default-dsci --timeout=180s -kubectl delete subscription opendatahub-operator -n openshift-operators -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n openshift-operators -l operators.coreos.com/opendatahub-operator.openshift-operators -o custom-columns=:metadata.name`; \ kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-operators @@ -494,11 +506,26 @@ install-opendatahub-operator: ## Install OpenDataHub operator kubectl create -f contrib/configuration/odh/opendatahub-operator-subscription.yaml @echo Waiting for opendatahub-operator Subscription to be ready kubectl wait -n openshift-operators subscription/opendatahub-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s - sleep 2 kubectl wait --for=condition=available deployment/opendatahub-operator-controller-manager -n openshift-operators --timeout=180s + -export ODH_POD_NAME=`kubectl get -n openshift-operators pod -o custom-columns=:metadata.name | grep opendatahub-operator-controller-manager`; \ + kubectl wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' pod/$$ODH_POD_NAME -n openshift-operators kubectl apply -f contrib/configuration/odh/default-dsci.yaml --server-side kubectl apply -f contrib/configuration/odh/default-dsc.yaml --server-side +.PHONY: delete-service-mesh-operator +delete-service-mesh-operator: ## Delete Service Mesh Operator + @echo -e "\n==> Deleting Service Mesh Operator \n" + kubectl delete subscription servicemeshoperator -n openshift-operators + -export CLUSTER_SERVICE_VERSION=`kubectl get clusterserviceversion -n openshift-operators -l operators.coreos.com/servicemeshoperator.openshift-operators -o custom-columns=:metadata.name`; \ + kubectl delete clusterserviceversion $$CLUSTER_SERVICE_VERSION -n openshift-operators + +.PHONY: install-service-mesh-operator +install-service-mesh-operator: ## Install Service Mesh Operator + @echo -e "\n==> Installing OpenShift Service Mesh Operator" + kubectl create -f contrib/configuration/service-mesh-operator-subscription.yaml + kubectl wait -n openshift-operators subscription/servicemeshoperator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + kubectl wait --for=condition=available deployment/istio-operator -n openshift-operators --timeout=180s + ##@ GPU Support .PHONY: install-nfd-operator install-nfd-operator: ## Install NFD operator ( Node Feature Discovery ) diff --git a/contrib/configuration/service-mesh-operator-subscription.yaml b/contrib/configuration/service-mesh-operator-subscription.yaml new file mode 100644 index 000000000..9e21a4c77 --- /dev/null +++ b/contrib/configuration/service-mesh-operator-subscription.yaml @@ -0,0 +1,13 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: servicemeshoperator + labels: + operators.coreos.com/servicemeshoperator.openshift-operators: '' + namespace: openshift-operators +spec: + channel: stable + name: servicemeshoperator + installPlanApproval: Automatic + source: redhat-operators + sourceNamespace: openshift-marketplace From 3f6e0efe9cf5afbba2c04a28d6b367824fd82b78 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Tue, 28 May 2024 14:14:24 +0100 Subject: [PATCH 8/9] Improved wait conditions for Operator deployments --- Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5de676ba5..bc47fa7d0 100644 --- a/Makefile +++ b/Makefile @@ -481,8 +481,9 @@ install-rhoai-operator: ## Install RHOAI Operator kubectl create -f contrib/configuration/rhoai/rhoai-operator-subscription.yaml @echo Waiting for rhoai-operator Subscription to be ready kubectl wait -n redhat-ods-operator subscription/rhods-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + @while [[ -z $$(kubectl get deployment/rhods-operator -n redhat-ods-operator) ]]; do echo "."; sleep 10; done -export RHOAI_POD_NAME=`kubectl get -n redhat-ods-operator pod -o custom-columns=:metadata.name | grep rhods-operator`; \ - kubectl wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' pod/$$RHOAI_POD_NAME -n redhat-ods-operator + kubectl wait --for=condition=Ready pod/$$RHOAI_POD_NAME -n redhat-ods-operator @echo -e "\n==> Creating default Data Science Cluster \n" kubectl apply -f contrib/configuration/rhoai/default-dsci.yaml --server-side kubectl apply -f contrib/configuration/rhoai/default-dsc.yaml --server-side @@ -506,9 +507,10 @@ install-opendatahub-operator: ## Install OpenDataHub operator kubectl create -f contrib/configuration/odh/opendatahub-operator-subscription.yaml @echo Waiting for opendatahub-operator Subscription to be ready kubectl wait -n openshift-operators subscription/opendatahub-operator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + @while [[ -z $$(kubectl get deployment/opendatahub-operator-controller-manager -n openshift-operators) ]]; do echo "."; sleep 10; done kubectl wait --for=condition=available deployment/opendatahub-operator-controller-manager -n openshift-operators --timeout=180s -export ODH_POD_NAME=`kubectl get -n openshift-operators pod -o custom-columns=:metadata.name | grep opendatahub-operator-controller-manager`; \ - kubectl wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' pod/$$ODH_POD_NAME -n openshift-operators + kubectl wait --for=condition=Ready pod/$$ODH_POD_NAME -n openshift-operators kubectl apply -f contrib/configuration/odh/default-dsci.yaml --server-side kubectl apply -f contrib/configuration/odh/default-dsc.yaml --server-side @@ -524,6 +526,7 @@ install-service-mesh-operator: ## Install Service Mesh Operator @echo -e "\n==> Installing OpenShift Service Mesh Operator" kubectl create -f contrib/configuration/service-mesh-operator-subscription.yaml kubectl wait -n openshift-operators subscription/servicemeshoperator --for=jsonpath='{.status.state}'=AtLatestKnown --timeout=180s + @while [[ -z $$(kubectl get deployment/istio-operator -n openshift-operators) ]]; do echo "."; sleep 10; done kubectl wait --for=condition=available deployment/istio-operator -n openshift-operators --timeout=180s ##@ GPU Support From 0b42d4dc68c28798eeaa50b72e1164ea355169b1 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Wed, 29 May 2024 12:44:11 +0100 Subject: [PATCH 9/9] Added custom accelerator and removal of nvidia driver --- Makefile | 5 +++++ contrib/configuration/accelerator-profile.yaml | 9 +++++++++ 2 files changed, 14 insertions(+) create mode 100644 contrib/configuration/accelerator-profile.yaml diff --git a/Makefile b/Makefile index bc47fa7d0..203b02d17 100644 --- a/Makefile +++ b/Makefile @@ -450,16 +450,20 @@ delete-all-in-one: delete-ai-platform-operator: ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator -make delete-rhoai-operator + -kubectl delete -f contrib/configuration/accelerator-profile.yaml -n redhat-ods-applications else ## Delete Open Data Hub Operator -make delete-opendatahub-operator + -kubectl delete -f contrib/configuration/accelerator-profile.yaml -n opendatahub endif .PHONY: install-ai-platform-operator install-ai-platform-operator: ifeq ($(USE_RHOAI), true) ## Delete RHOAI Operator -make install-rhoai-operator + -kubectl apply -f contrib/configuration/accelerator-profile.yaml -n redhat-ods-applications else ## Delete Open Data Hub Operator -make install-opendatahub-operator + -kubectl apply -f contrib/configuration/accelerator-profile.yaml -n opendatahub endif .PHONY: delete-rhoai-operator @@ -569,6 +573,7 @@ endif .PHONY: delete-nvidia-operator delete-nvidia-operator: ## Delete nvidia operator @echo -e "\n==> Deleting ClusterPolicy CR \n" + kubectl delete --ignore-not-found=true NVIDIADriver gpu-driver kubectl delete ClusterPolicy --all -n nvidia-gpu-operator @while [[ -n $$(kubectl get ClusterPolicy -n nvidia-gpu-operator) ]]; do echo "."; sleep 10; done @echo -e "\n==> Deleting nvidia Operator \n" diff --git a/contrib/configuration/accelerator-profile.yaml b/contrib/configuration/accelerator-profile.yaml new file mode 100644 index 000000000..37f64ea04 --- /dev/null +++ b/contrib/configuration/accelerator-profile.yaml @@ -0,0 +1,9 @@ +apiVersion: dashboard.opendatahub.io/v1 +kind: AcceleratorProfile +metadata: + name: gpu-accelerator-profile +spec: { + displayName: nvidia-gpu, + enabled: true, + identifier: nvidia.com/gpu +}