@@ -36,7 +36,6 @@ package e2e
36
36
import (
37
37
"fmt"
38
38
"os"
39
-
40
39
"time"
41
40
42
41
. "github.com/onsi/ginkgo"
@@ -140,7 +139,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
140
139
Expect (err ).NotTo (HaveOccurred ())
141
140
})
142
141
143
- It ("MCAD CPU Requeuing Test" , func () {
142
+ It ("MCAD CPU Requeuing - Completion After enough Requeuing Times Test" , func () {
144
143
fmt .Fprintf (os .Stdout , "[e2e] MCAD CPU Requeuing Test - Started.\n " )
145
144
146
145
context := initTestContext ()
@@ -153,13 +152,31 @@ var _ = Describe("AppWrapper E2E Test", func() {
153
152
// The job should be requeued 3 times before it finishes since the wait time is doubled each time the job is requeued (i.e., initially it waits
154
153
// for 1 minutes before requeuing, then 2 minutes, and then 4 minutes). Since the init containers take 3 minutes
155
154
// and 20 seconds to finish, a 4 minute wait should be long enough to finish the job successfully
156
- aw := createJobAWWithInitContainer (context , "aw-job-3-init-container" )
155
+ aw := createJobAWWithInitContainer (context , "aw-job-3-init-container" , 60 , "exponential" , 0 )
157
156
appwrappers = append (appwrappers , aw )
158
157
159
- err := waitAWPodsCompleted (context , aw ) // This test waits for 10 minutes to make sure all PODs complete
158
+ err := waitAWPodsCompleted (context , aw , 600 * time . Second ) // This test waits for 10 minutes to make sure all PODs complete
160
159
Expect (err ).NotTo (HaveOccurred ())
161
160
})
162
161
162
+ It ("MCAD CPU Requeuing - Deletion After Maximum Requeuing Times Test" , func () {
163
+ fmt .Fprintf (os .Stdout , "[e2e] MCAD CPU Requeuing Test - Started.\n " )
164
+
165
+ context := initTestContext ()
166
+ var appwrappers []* arbv1.AppWrapper
167
+ appwrappersPtr := & appwrappers
168
+ defer cleanupTestObjectsPtr (context , appwrappersPtr )
169
+
170
+ // Create a job with init containers that need 200 seconds to be ready before the container starts.
171
+ // The requeuing mechanism is set to fire after 1 second (plus the 60 seconds time interval of the background thread)
172
+ // Within 5 minutes, the AppWrapper will be requeued up to 3 times at which point it will be deleted
173
+ aw := createJobAWWithInitContainer (context , "aw-job-3-init-container" , 1 , "none" , 3 )
174
+ appwrappers = append (appwrappers , aw )
175
+
176
+ err := waitAWPodsCompleted (context , aw , 300 * time .Second )
177
+ Expect (err ).To (HaveOccurred ())
178
+ })
179
+
163
180
It ("Create AppWrapper - StatefulSet Only - 2 Pods" , func () {
164
181
fmt .Fprintf (os .Stdout , "[e2e] Create AppWrapper - StatefulSet Only - 2 Pods - Started.\n " )
165
182
@@ -371,7 +388,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
371
388
372
389
// This should fit on cluster with customPodResources matching deployment resource demands so AW pods are created
373
390
aw := createGenericDeploymentCustomPodResourcesWithCPUAW (
374
- context , "aw-deployment-2-550-vs-550-cpu" , "550m" , "550m" , 2 , 1 )
391
+ context , "aw-deployment-2-550-vs-550-cpu" , "550m" , "550m" , 2 , 60 )
375
392
376
393
appwrappers = append (appwrappers , aw )
377
394
@@ -399,7 +416,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
399
416
400
417
// This should not fit on any node but should dispatch because there is enough aggregated resources.
401
418
aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW (
402
- context , "aw-ff-deployment-1-700-cpu" , "700m" , "700m" , 1 , 1 )
419
+ context , "aw-ff-deployment-1-700-cpu" , "700m" , "700m" , 1 , 60 )
403
420
404
421
appwrappers = append (appwrappers , aw2 )
405
422
@@ -412,7 +429,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
412
429
// This should fit on cluster after AW aw-deployment-1-700-cpu above is automatically preempted on
413
430
// scheduling failure
414
431
aw3 := createGenericDeploymentCustomPodResourcesWithCPUAW (
415
- context , "aw-ff-deployment-2-340-cpu" , "340m" , "340m" , 2 , 1 )
432
+ context , "aw-ff-deployment-2-340-cpu" , "340m" , "340m" , 2 , 60 )
416
433
417
434
appwrappers = append (appwrappers , aw3 )
418
435
@@ -445,7 +462,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
445
462
446
463
// This should not fit on cluster but customPodResources is incorrect so AW pods are created
447
464
aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW (
448
- context , "aw-deployment-2-425-vs-426-cpu" , "425m" , "426m" , 2 , 1 )
465
+ context , "aw-deployment-2-425-vs-426-cpu" , "425m" , "426m" , 2 , 60 )
449
466
450
467
appwrappers = append (appwrappers , aw2 )
451
468
@@ -472,7 +489,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
472
489
473
490
// This should fit on cluster but customPodResources is incorrect so AW pods are not created
474
491
aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW (
475
- context , "aw-deployment-2-426-vs-425-cpu" , "426m" , "425m" , 2 , 1 )
492
+ context , "aw-deployment-2-426-vs-425-cpu" , "426m" , "425m" , 2 , 60 )
476
493
477
494
appwrappers = append (appwrappers , aw2 )
478
495
@@ -517,7 +534,7 @@ var _ = Describe("AppWrapper E2E Test", func() {
517
534
aw := createGenericJobAWWithScheduleSpec (context , "aw-test-job-with-scheduling-spec" )
518
535
err1 := waitAWPodsReady (context , aw )
519
536
Expect (err1 ).NotTo (HaveOccurred ())
520
- err2 := waitAWPodsCompleted (context , aw )
537
+ err2 := waitAWPodsCompleted (context , aw , 90 * time . Second )
521
538
Expect (err2 ).NotTo (HaveOccurred ())
522
539
523
540
// Once pods are completed, we wait for them to see if they change their status to anything BUT "Completed"
0 commit comments