@@ -6,12 +6,8 @@ import (
6
6
7
7
. "github.com/onsi/gomega"
8
8
mcadv1beta1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
9
-
10
9
batchv1 "k8s.io/api/batch/v1"
11
10
corev1 "k8s.io/api/core/v1"
12
- "k8s.io/apimachinery/pkg/api/resource"
13
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14
-
15
11
. "github.com/project-codeflare/codeflare-operator/test/support"
16
12
)
17
13
@@ -23,41 +19,13 @@ func TestInstascaleMachinePool(t *testing.T) {
23
19
namespace := test .NewTestNamespace ()
24
20
25
21
// Test configuration
26
- config := & corev1.ConfigMap {
27
- TypeMeta : metav1.TypeMeta {
28
- APIVersion : corev1 .SchemeGroupVersion .String (),
29
- Kind : "ConfigMap" ,
30
- },
31
- ObjectMeta : metav1.ObjectMeta {
32
- Name : "mnist-mcad" ,
33
- Namespace : namespace .Name ,
34
- },
35
- BinaryData : map [string ][]byte {
36
- // pip requirements
37
- "requirements.txt" : ReadFile (test , "mnist_pip_requirements.txt" ),
38
- // MNIST training script
39
- "mnist.py" : ReadFile (test , "mnist.py" ),
40
- },
41
- Immutable : Ptr (true ),
42
- }
43
-
44
- config , err := test .Client ().Core ().CoreV1 ().ConfigMaps (namespace .Name ).Create (test .Ctx (), config , metav1.CreateOptions {})
45
- test .Expect (err ).NotTo (HaveOccurred ())
46
- test .T ().Logf ("Created ConfigMap %s/%s successfully" , config .Namespace , config .Name )
22
+ config , err := TestConfig (test , namespace .Name )
23
+ test .Expect (err ).To (BeNil ())
47
24
48
25
//create OCM connection
49
- instascaleOCMSecret , err := test .Client ().Core ().CoreV1 ().Secrets ("default" ).Get (test .Ctx (), "instascale-ocm-secret" , metav1.GetOptions {})
50
- if err != nil {
51
- test .T ().Errorf ("unable to retrieve instascale-ocm-secret - Error : %v" , err )
52
- }
53
- test .Expect (err ).NotTo (HaveOccurred ())
54
- ocmToken := string (instascaleOCMSecret .Data ["token" ])
55
- test .T ().Logf ("Retrieved Secret %s successfully" , instascaleOCMSecret .Name )
26
+ connection , err := CreateConnection (test )
27
+ test .Expect (err ).To (BeNil ())
56
28
57
- connection , err := CreateOCMConnection (ocmToken )
58
- if err != nil {
59
- test .T ().Errorf ("Unable to create ocm connection - Error : %v" , err )
60
- }
61
29
defer connection .Close ()
62
30
63
31
// check existing cluster machine pool resources
@@ -66,120 +34,9 @@ func TestInstascaleMachinePool(t *testing.T) {
66
34
test .Expect (err ).NotTo (HaveOccurred ())
67
35
test .Expect (foundMachinePool ).To (BeFalse ())
68
36
69
- // Batch Job
70
- job := & batchv1.Job {
71
- TypeMeta : metav1.TypeMeta {
72
- APIVersion : batchv1 .SchemeGroupVersion .String (),
73
- Kind : "Job" ,
74
- },
75
- ObjectMeta : metav1.ObjectMeta {
76
- Name : "mnist" ,
77
- Namespace : namespace .Name ,
78
- },
79
- Spec : batchv1.JobSpec {
80
- Completions : Ptr (int32 (1 )),
81
- Parallelism : Ptr (int32 (1 )),
82
- Template : corev1.PodTemplateSpec {
83
- Spec : corev1.PodSpec {
84
- Containers : []corev1.Container {
85
- {
86
- Name : "job" ,
87
- Image : GetPyTorchImage (),
88
- Env : []corev1.EnvVar {
89
- corev1.EnvVar {Name : "PYTHONUSERBASE" , Value : "/test2" },
90
- },
91
- Command : []string {"/bin/sh" , "-c" , "pip install -r /test/requirements.txt && torchrun /test/mnist.py" },
92
- Args : []string {"$PYTHONUSERBASE" },
93
- VolumeMounts : []corev1.VolumeMount {
94
- {
95
- Name : "test" ,
96
- MountPath : "/test" ,
97
- },
98
- {
99
- Name : "test2" ,
100
- MountPath : "/test2" ,
101
- },
102
- },
103
- WorkingDir : "/test2" ,
104
- },
105
- },
106
- Volumes : []corev1.Volume {
107
- {
108
- Name : "test" ,
109
- VolumeSource : corev1.VolumeSource {
110
- ConfigMap : & corev1.ConfigMapVolumeSource {
111
- LocalObjectReference : corev1.LocalObjectReference {
112
- Name : config .Name ,
113
- },
114
- },
115
- },
116
- },
117
- {
118
- Name : "test2" ,
119
- VolumeSource : corev1.VolumeSource {
120
- EmptyDir : & corev1.EmptyDirVolumeSource {},
121
- },
122
- },
123
- },
124
- RestartPolicy : corev1 .RestartPolicyNever ,
125
- },
126
- },
127
- },
128
- }
129
-
130
- // create an appwrapper
131
- aw := & mcadv1beta1.AppWrapper {
132
- ObjectMeta : metav1.ObjectMeta {
133
- Name : "test-instascale" ,
134
- Namespace : namespace .Name ,
135
- Labels : map [string ]string {
136
- "orderedinstance" : "m5.xlarge_g4dn.xlarge" ,
137
- },
138
- },
139
- Spec : mcadv1beta1.AppWrapperSpec {
140
- AggrResources : mcadv1beta1.AppWrapperResourceList {
141
- GenericItems : []mcadv1beta1.AppWrapperGenericResource {
142
- {
143
- CustomPodResources : []mcadv1beta1.CustomPodResourceTemplate {
144
- {
145
- Replicas : 1 ,
146
- Requests : corev1.ResourceList {
147
- corev1 .ResourceCPU : resource .MustParse ("250m" ),
148
- corev1 .ResourceMemory : resource .MustParse ("512Mi" ),
149
- "nvidia.com/gpu" : resource .MustParse ("1" ),
150
- },
151
- Limits : corev1.ResourceList {
152
- corev1 .ResourceCPU : resource .MustParse ("500m" ),
153
- corev1 .ResourceMemory : resource .MustParse ("1G" ),
154
- "nvidia.com/gpu" : resource .MustParse ("1" ),
155
- },
156
- },
157
- {
158
- Replicas : 1 ,
159
- Requests : corev1.ResourceList {
160
- corev1 .ResourceCPU : resource .MustParse ("250m" ),
161
- corev1 .ResourceMemory : resource .MustParse ("512Mi" ),
162
- },
163
- Limits : corev1.ResourceList {
164
- corev1 .ResourceCPU : resource .MustParse ("500m" ),
165
- corev1 .ResourceMemory : resource .MustParse ("1G" ),
166
- },
167
- },
168
- },
169
- GenericTemplate : Raw (test , job ),
170
- CompletionStatus : "Complete" ,
171
- },
172
- },
173
- },
174
- },
175
- }
176
-
177
- _ , err = test .Client ().MCAD ().WorkloadV1beta1 ().AppWrappers (namespace .Name ).Create (test .Ctx (), aw , metav1.CreateOptions {})
178
- test .Expect (err ).NotTo (HaveOccurred ())
179
- test .T ().Logf ("AppWrapper created successfully %s/%s" , aw .Namespace , aw .Name )
180
-
181
- test .Eventually (AppWrapper (test , namespace , aw .Name ), TestTimeoutShort ).
182
- Should (WithTransform (AppWrapperState , Equal (mcadv1beta1 .AppWrapperStateActive )))
37
+ // Setup batch job and AppWrapper
38
+ job , aw , err := JobAppwrapperSetup (test , namespace , config )
39
+ test .Expect (err ).To (BeNil ())
183
40
184
41
// time.Sleep is used twice throughout the test, each for 30 seconds. Can look into using sync package waitGroup instead if that makes more sense
185
42
// wait for required resources to scale up before checking them again
0 commit comments