Skip to content

Commit 2955b0c

Browse files
committed
Review feedback
1 parent 941240f commit 2955b0c

File tree

2 files changed

+34
-26
lines changed

2 files changed

+34
-26
lines changed

demo-notebooks/batch-job/batch_mnist_mcad.ipynb

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
"outputs": [],
99
"source": [
1010
"# Import pieces from codeflare-sdk\n",
11-
"from codeflare_sdk.cluster.auth import TokenAuthentication"
11+
"from codeflare_sdk.cluster.auth import TokenAuthentication\n",
12+
"from codeflare_sdk.job.jobs import DDPJobDefinition"
1213
]
1314
},
1415
{
@@ -45,16 +46,6 @@
4546
"Now that we are logged in, we can directly submit our batch job (model training on two workers with four gpus each) to MCAD via torchx."
4647
]
4748
},
48-
{
49-
"cell_type": "code",
50-
"execution_count": 5,
51-
"id": "3cc6183a-8f6e-4347-af91-d088ed422544",
52-
"metadata": {},
53-
"outputs": [],
54-
"source": [
55-
"from codeflare_sdk.job.jobs import DDPJobDefinition"
56-
]
57-
},
5849
{
5950
"cell_type": "code",
6051
"execution_count": 6,
@@ -70,7 +61,17 @@
7061
}
7162
],
7263
"source": [
73-
"job = DDPJobDefinition(name=\"mnistjob\", script=\"mnist.py\", scheduler_args={\"namespace\": \"default\"}, j=\"1x1\", gpu=0, cpu=1, memMB=8000, image=\"quay.io/michaelclifford/mnist-test:latest\").submit()"
64+
"job = DDPJobDefinition(\n",
65+
" name=\"mnistjob\",\n",
66+
" script=\"mnist.py\",\n",
67+
" scheduler_args={\"namespace\": \"default\"},\n",
68+
" j=\"1x1\",\n",
69+
" gpu=0,\n",
70+
" cpu=1,\n",
71+
" memMB=8000,\n",
72+
" image=\"quay.io/michaelclifford/mnist-test:latest\"\n",
73+
")\n",
74+
"job.submit()"
7475
]
7576
},
7677
{

demo-notebooks/batch-job/batch_mnist_ray.ipynb

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
"source": [
1010
"# Import pieces from codeflare-sdk\n",
1111
"from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration\n",
12-
"from codeflare_sdk.cluster.auth import TokenAuthentication"
12+
"from codeflare_sdk.cluster.auth import TokenAuthentication\n",
13+
"from codeflare_sdk.job.jobs import DDPJobDefinition"
1314
]
1415
},
1516
{
@@ -51,8 +52,20 @@
5152
}
5253
],
5354
"source": [
54-
"# Create our cluster and submit appwrapper\n",
55-
"cluster = Cluster(ClusterConfiguration(name='mnisttest', min_worker=2, max_worker=2, min_cpus=8, max_cpus=8, min_memory=16, max_memory=16, gpu=4, instascale=True, machine_types=[\"m5.xlarge\", \"p3.8xlarge\"]))"
55+
"# Create our cluster and submit appwrapper (reduce specs as desired)\n",
56+
"cluster = Cluster(ClusterConfiguration(\n",
57+
" name='mnisttest',\n",
58+
" namespace='default',\n",
59+
" min_worker=2,\n",
60+
" max_worker=2,\n",
61+
" min_cpus=8,\n",
62+
" max_cpus=8,\n",
63+
" min_memory=16,\n",
64+
" max_memory=16,\n",
65+
" gpu=4,\n",
66+
" instascale=True, # Can be set to false if scaling not needed\n",
67+
" machine_types=[\"m5.xlarge\", \"g4dn.xlarge\"] # Can be removed if above is false\n",
68+
"))"
5669
]
5770
},
5871
{
@@ -293,16 +306,6 @@
293306
"Now that our resource cluster is ready, we can directly submit our batch job (model training on two workers with four gpus each) to the cluster via torchx."
294307
]
295308
},
296-
{
297-
"cell_type": "code",
298-
"execution_count": 9,
299-
"id": "3cc6183a-8f6e-4347-af91-d088ed422544",
300-
"metadata": {},
301-
"outputs": [],
302-
"source": [
303-
"from codeflare_sdk.job.jobs import DDPJobDefinition"
304-
]
305-
},
306309
{
307310
"cell_type": "code",
308311
"execution_count": 10,
@@ -320,7 +323,11 @@
320323
}
321324
],
322325
"source": [
323-
"job = DDPJobDefinition(script=\"mnist.py\", scheduler_args={\"requirements\": \"requirements.txt\"}).submit(cluster)"
326+
"job = DDPJobDefinition(\n",
327+
" script=\"mnist.py\",\n",
328+
" scheduler_args={\"requirements\": \"requirements.txt\"}\n",
329+
")\n",
330+
"job.submit(cluster)"
324331
]
325332
},
326333
{

0 commit comments

Comments
 (0)