From af9df5ba09714ae88fc7c131982b9aaf51ad12c0 Mon Sep 17 00:00:00 2001 From: maxusmusti Date: Mon, 31 Oct 2022 17:50:40 -0400 Subject: [PATCH 1/2] Updated default head node specs and my_pod_ip env --- src/codeflare_sdk/templates/new-template.yaml | 33 ++----------------- src/codeflare_sdk/utils/generate_yaml.py | 14 +++++--- 2 files changed, 12 insertions(+), 35 deletions(-) diff --git a/src/codeflare_sdk/templates/new-template.yaml b/src/codeflare_sdk/templates/new-template.yaml index e4428a901..3a550bd18 100644 --- a/src/codeflare_sdk/templates/new-template.yaml +++ b/src/codeflare_sdk/templates/new-template.yaml @@ -116,22 +116,6 @@ spec: # The Ray head pod - name: ray-head image: rayproject/ray:latest - env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: s3-creds - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: s3-creds - key: AWS_SECRET_ACCESS_KEY - - name: ENDPOINT_URL - valueFrom: - secretKeyRef: - name: s3-creds - key: ENDPOINT_URL imagePullPolicy: Always ports: - containerPort: 6379 @@ -202,21 +186,10 @@ spec: - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:latest env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: s3-creds - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: s3-creds - key: AWS_SECRET_ACCESS_KEY - - name: ENDPOINT_URL + - name: MY_POD_IP valueFrom: - secretKeyRef: - name: s3-creds - key: ENDPOINT_URL + fieldRef: + fieldPath: status.podIP # environment variables to set in the container.Optional. # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/ lifecycle: diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 82ea6bb05..2898090f4 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -50,6 +50,9 @@ def update_custompodresources(item, min_cpu, max_cpu, min_memory, max_memory, gp if 'custompodresources' in item.keys(): custompodresources = item.get('custompodresources') for i in range(len(custompodresources)): + if i == 0: + # Leave head node resources as template default + continue resource = custompodresources[i] for k,v in resource.items(): if k == "replicas" and i == 1: @@ -90,10 +93,11 @@ def update_image(spec, image): def update_env(spec, env): containers = spec.get("containers") for container in containers: - if not env: - container.pop("env") - else: - container["env"] = env + if env: + if "env" in container: + container["env"].extend(env) + else: + container["env"] = env def update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu): container = spec.get("containers") @@ -126,7 +130,7 @@ def update_nodes(item, appwrapper_name, min_cpu, max_cpu, min_memory, max_memory update_image(spec, image) update_env(spec, env) if comp == head: - update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, 0) + update_resources(spec, 2, 2, 8, 8, 0) else: update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu) From 250fb2ed5c2e8809ae9ec7c181638734a0c7be08 Mon Sep 17 00:00:00 2001 From: maxusmusti Date: Mon, 31 Oct 2022 17:59:27 -0400 Subject: [PATCH 2/2] Uniform default --- src/codeflare_sdk/templates/new-template.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codeflare_sdk/templates/new-template.yaml b/src/codeflare_sdk/templates/new-template.yaml index 3a550bd18..28861e1f0 100644 --- a/src/codeflare_sdk/templates/new-template.yaml +++ b/src/codeflare_sdk/templates/new-template.yaml @@ -17,11 +17,11 @@ spec: - replicas: 1 requests: cpu: 2 - memory: 12G + memory: 8G nvidia.com/gpu: 0 limits: cpu: 2 - memory: 12G + memory: 8G nvidia.com/gpu: 0 - replicas: 3 requests: