@@ -373,41 +373,53 @@ function setup-mcad-env {
373
373
do
374
374
echo -n " ." && sleep 1;
375
375
done
376
-
377
376
}
378
377
379
378
function extend-resources {
380
379
# Patch nodes to provide GPUs resources without physical GPUs.
381
380
# This is intended to allow testing of GPU specific features such as histograms.
382
381
383
382
# Start communication with cluster
384
- echo -n " Starting proxy "
385
-
386
383
kubectl proxy > /dev/null 2>&1 &
387
- PROXY_PID=$!
384
+ proxy_pid=$!
385
+
386
+ echo " Starting background proxy connection (pid=${proxy_pid} )..."
387
+
388
+ curl 127.0.0.1:8001 > /dev/null 2>&1
389
+
390
+ if [[ ! $? -eq 0 ]]; then
391
+ echo " Calling 'kubectl proxy' did not create a successful connection to the kubelet needed to patch the nodes. Exiting."
392
+ exit 1
393
+ else
394
+ echo " Connected to the kubelet for patching the nodes"
395
+ fi
388
396
389
- echo " (pid=${PROXY_PID} )..."
390
397
391
398
# Variables
392
- RESOURCE_NAME =" nvidia.com~1gpu"
393
- RESOURCE_COUNT =" 8"
399
+ resource_name =" nvidia.com~1gpu"
400
+ resource_count =" 8"
394
401
395
402
# Patch nodes
396
- for NODE_NAME in $( kubectl get nodes --no-headers -o custom-columns=" :metadata.name" )
403
+ for node_name in $( kubectl get nodes --no-headers -o custom-columns=" :metadata.name" )
397
404
do
398
- echo " - Patching node (add): ${NODE_NAME} "
405
+ echo " - Patching node (add): ${node_name} "
406
+
407
+ patching_status=$( curl --header " Content-Type: application/json-patch+json" \
408
+ --request PATCH \
409
+ --data ' [{"op": "add", "path": "/status/capacity/' ${resource_name} ' ", "value": "' ${resource_count} ' "}]' \
410
+ http://localhost:8001/api/v1/nodes/${node_name} /status | jq -r ' .status' )
399
411
400
- curl --header " Content-Type: application/json-patch+json " \
401
- --request PATCH \
402
- --data ' [{"op": "add", "path": "/status/capacity/ ' ${RESOURCE_NAME} ' ", "value": " ' ${RESOURCE_COUNT} ' "}] ' \
403
- http://localhost:8001/api/v1/nodes/ ${NODE_NAME} /status
412
+ if [[ ${patching_status} = " Failure " ]] ; then
413
+ echo " Failed to patch node ' ${node_name} ' with GPU resources "
414
+ exit 1
415
+ fi
404
416
405
417
echo
406
418
done
407
419
408
420
# Stop communication with cluster
409
- echo " Killing proxy (pid=${PROXY_PID } )..."
410
- kill ${PROXY_PID }
421
+ echo " Killing proxy (pid=${proxy_pid } )..."
422
+ kill -9 ${proxy_pid }
411
423
412
424
# Run kuttl tests to confirm GPUs were added correctly
413
425
kuttl_test=" ${ROOT_DIR} /test/kuttl-test-extended-resources.yaml"
0 commit comments