From 4803f7c089192878039abfa6dfc2020a3e24fc30 Mon Sep 17 00:00:00 2001 From: David Grove Date: Wed, 26 Jun 2024 14:04:20 -0400 Subject: [PATCH] Deploy Kueue configured with AppWrapper as an externalFramework --- hack/deploy-kueue.sh | 9 +++++- hack/kueue-patches/01-manage-all-jobs.txt | 11 ++++++++ .../02-aw-external-frameworks.txt | 28 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 hack/kueue-patches/01-manage-all-jobs.txt create mode 100644 hack/kueue-patches/02-aw-external-frameworks.txt diff --git a/hack/deploy-kueue.sh b/hack/deploy-kueue.sh index fbd3f92..faa858a 100755 --- a/hack/deploy-kueue.sh +++ b/hack/deploy-kueue.sh @@ -18,8 +18,15 @@ KUEUE_VERSION=v0.7.0 export ROOT_DIR="$(dirname "$(dirname "$(readlink -fn "$0")")")" +echo "Downloading and patching Kueue ${KUEUE_VERSION} manifests" +wget -q https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml -O $ROOT_DIR/hack/kueue-manifest.yaml +patch -p 0 $ROOT_DIR/hack/kueue-manifest.yaml < $ROOT_DIR/hack/kueue-patches/01-manage-all-jobs.txt || exit 1 +patch -p 0 $ROOT_DIR/hack/kueue-manifest.yaml < $ROOT_DIR/hack/kueue-patches/02-aw-external-frameworks.txt || exit 1 + echo "Deploying Kueue version $KUEUE_VERSION" -kubectl apply --server-side -f https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml +kubectl apply --server-side -f $ROOT_DIR/hack/kueue-manifest.yaml + +rm -f $ROOT_DIR/hack/kueue-manifest.yaml # Sleep until the kueue manager is running echo "Waiting for pods in the kueue-system namespace to become ready" diff --git a/hack/kueue-patches/01-manage-all-jobs.txt b/hack/kueue-patches/01-manage-all-jobs.txt new file mode 100644 index 0000000..ee11c34 --- /dev/null +++ b/hack/kueue-patches/01-manage-all-jobs.txt @@ -0,0 +1,11 @@ +--- manifests.yaml 2024-06-26 13:58:48.132795505 -0400 ++++ manifests.yaml 2024-06-26 13:59:54.945553273 -0400 +@@ -11878,7 +11878,7 @@ + # backoffLimitCount: null # null indicates infinite requeuing + # backoffBaseSeconds: 60 + # backoffMaxSeconds: 3600 +- #manageJobsWithoutQueueName: true ++ manageJobsWithoutQueueName: true + #internalCertManagement: + # enable: false + # webhookServiceName: "" diff --git a/hack/kueue-patches/02-aw-external-frameworks.txt b/hack/kueue-patches/02-aw-external-frameworks.txt new file mode 100644 index 0000000..095e979 --- /dev/null +++ b/hack/kueue-patches/02-aw-external-frameworks.txt @@ -0,0 +1,28 @@ +--- manifests.yaml 2024-06-26 13:59:54.945553273 -0400 ++++ manifests.yaml 2024-06-26 14:02:25.889855296 -0400 +@@ -11225,6 +11225,14 @@ + - get + - list + - watch ++- apiGroups: ++ - workload.codeflare.dev ++ resources: ++ - appwrappers ++ verbs: ++ - get ++ - list ++ - watch + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole +@@ -11896,8 +11904,8 @@ + - "kubeflow.org/tfjob" + - "kubeflow.org/xgboostjob" + # - "pod" +- # externalFrameworks: +- # - "Foo.v1.example.com" ++ externalFrameworks: ++ - "AppWrapper.v1beta2.workload.codeflare.dev" + # podOptions: + # namespaceSelector: + # matchExpressions: