Skip to content

Commit f87ef1d

Browse files
XciDsayakpaul
authored andcommitted
[CI] Update runner configuration for setup and nightly tests (#9005)
* [CI] Update runner configuration for setup and nightly tests Signed-off-by: Adrien <adrien@huggingface.co> * fix group Signed-off-by: Adrien <adrien@huggingface.co> * update for t4 Signed-off-by: Adrien <adrien@huggingface.co> --------- Signed-off-by: Adrien <adrien@huggingface.co>
1 parent edddf3d commit f87ef1d

10 files changed

+58
-35
lines changed

.github/workflows/build_docker_images.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ env:
2020

2121
jobs:
2222
test-build-docker-images:
23-
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
23+
runs-on:
24+
group: aws-general-8-plus
2425
if: github.event_name == 'pull_request'
2526
steps:
2627
- name: Set up Docker Buildx
@@ -50,7 +51,8 @@ jobs:
5051
if: steps.file_changes.outputs.all != ''
5152

5253
build-and-push-docker-images:
53-
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
54+
runs-on:
55+
group: aws-general-8-plus
5456
if: github.event_name != 'pull_request'
5557

5658
permissions:
@@ -98,4 +100,4 @@ jobs:
98100
slack_channel: ${{ env.CI_SLACK_CHANNEL }}
99101
title: "🤗 Results of the ${{ matrix.image-name }} Docker Image build"
100102
status: ${{ job.status }}
101-
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
103+
slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

.github/workflows/nightly_tests.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ env:
1919
jobs:
2020
setup_torch_cuda_pipeline_matrix:
2121
name: Setup Torch Pipelines CUDA Slow Tests Matrix
22-
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
22+
runs-on:
23+
group: aws-general-8-plus
2324
container:
2425
image: diffusers/diffusers-pytorch-cpu
2526
outputs:
@@ -55,7 +56,8 @@ jobs:
5556
max-parallel: 8
5657
matrix:
5758
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
58-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
59+
runs-on:
60+
group: aws-g4dn-2xlarge
5961
container:
6062
image: diffusers/diffusers-pytorch-cuda
6163
options: --shm-size "16gb" --ipc host --gpus 0
@@ -105,7 +107,8 @@ jobs:
105107
106108
run_nightly_tests_for_other_torch_modules:
107109
name: Nightly Torch CUDA Tests
108-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
110+
runs-on:
111+
group: aws-g4dn-2xlarge
109112
container:
110113
image: diffusers/diffusers-pytorch-cuda
111114
options: --shm-size "16gb" --ipc host --gpus 0
@@ -234,7 +237,8 @@ jobs:
234237
235238
run_nightly_onnx_tests:
236239
name: Nightly ONNXRuntime CUDA tests on Ubuntu
237-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
240+
runs-on:
241+
group: aws-g4dn-2xlarge
238242
container:
239243
image: diffusers/diffusers-onnxruntime-cuda
240244
options: --gpus 0 --shm-size "16gb" --ipc host

.github/workflows/pr_test_fetcher.yml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ concurrency:
1515
jobs:
1616
setup_pr_tests:
1717
name: Setup PR Tests
18-
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
18+
runs-on:
19+
group: aws-general-8-plus
1920
container:
2021
image: diffusers/diffusers-pytorch-cpu
2122
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
@@ -73,7 +74,8 @@ jobs:
7374
max-parallel: 2
7475
matrix:
7576
modules: ${{ fromJson(needs.setup_pr_tests.outputs.matrix) }}
76-
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
77+
runs-on:
78+
group: aws-general-8-plus
7779
container:
7880
image: diffusers/diffusers-pytorch-cpu
7981
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/
@@ -123,12 +125,13 @@ jobs:
123125
config:
124126
- name: Hub tests for models, schedulers, and pipelines
125127
framework: hub_tests_pytorch
126-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
128+
runner: aws-general-8-plus
127129
image: diffusers/diffusers-pytorch-cpu
128130
report: torch_hub
129131

130132
name: ${{ matrix.config.name }}
131-
runs-on: ${{ matrix.config.runner }}
133+
runs-on:
134+
group: ${{ matrix.config.runner }}
132135
container:
133136
image: ${{ matrix.config.image }}
134137
options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/

.github/workflows/pr_test_peft_backend.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ jobs:
7171

7272
name: LoRA - ${{ matrix.lib-versions }}
7373

74-
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
74+
runs-on:
75+
group: aws-general-8-plus
7576

7677
container:
7778
image: diffusers/diffusers-pytorch-cpu
@@ -128,4 +129,4 @@ jobs:
128129
uses: actions/upload-artifact@v2
129130
with:
130131
name: pr_${{ matrix.config.report }}_test_reports
131-
path: reports
132+
path: reports

.github/workflows/pr_tests.yml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,28 +77,29 @@ jobs:
7777
config:
7878
- name: Fast PyTorch Pipeline CPU tests
7979
framework: pytorch_pipelines
80-
runner: [ self-hosted, intel-cpu, 32-cpu, 256-ram, ci ]
80+
runner: aws-highmemory-32-plus
8181
image: diffusers/diffusers-pytorch-cpu
8282
report: torch_cpu_pipelines
8383
- name: Fast PyTorch Models & Schedulers CPU tests
8484
framework: pytorch_models
85-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
85+
runner: aws-general-8-plus
8686
image: diffusers/diffusers-pytorch-cpu
8787
report: torch_cpu_models_schedulers
8888
- name: Fast Flax CPU tests
8989
framework: flax
90-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
90+
runner: aws-general-8-plus
9191
image: diffusers/diffusers-flax-cpu
9292
report: flax_cpu
9393
- name: PyTorch Example CPU tests
9494
framework: pytorch_examples
95-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
95+
runner: aws-general-8-plus
9696
image: diffusers/diffusers-pytorch-cpu
9797
report: torch_example_cpu
9898

9999
name: ${{ matrix.config.name }}
100100

101-
runs-on: ${{ matrix.config.runner }}
101+
runs-on:
102+
group: ${{ matrix.config.runner }}
102103

103104
container:
104105
image: ${{ matrix.config.image }}
@@ -180,7 +181,8 @@ jobs:
180181
config:
181182
- name: Hub tests for models, schedulers, and pipelines
182183
framework: hub_tests_pytorch
183-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
184+
runner:
185+
group: aws-general-8-plus
184186
image: diffusers/diffusers-pytorch-cpu
185187
report: torch_hub
186188

.github/workflows/push_tests.yml

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ env:
1919
jobs:
2020
setup_torch_cuda_pipeline_matrix:
2121
name: Setup Torch Pipelines CUDA Slow Tests Matrix
22-
runs-on: [ self-hosted, intel-cpu, 8-cpu, ci ]
22+
runs-on:
23+
group: aws-general-8-plus
2324
container:
2425
image: diffusers/diffusers-pytorch-cpu
2526
outputs:
@@ -57,7 +58,8 @@ jobs:
5758
max-parallel: 8
5859
matrix:
5960
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
60-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
61+
runs-on:
62+
group: aws-g4dn-2xlarge
6163
container:
6264
image: diffusers/diffusers-pytorch-cuda
6365
options: --shm-size "16gb" --ipc host --gpus 0
@@ -101,7 +103,8 @@ jobs:
101103

102104
torch_cuda_tests:
103105
name: Torch CUDA Tests
104-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
106+
runs-on:
107+
group: aws-g4dn-2xlarge
105108
container:
106109
image: diffusers/diffusers-pytorch-cuda
107110
options: --shm-size "16gb" --ipc host --gpus 0
@@ -201,7 +204,8 @@ jobs:
201204

202205
onnx_cuda_tests:
203206
name: ONNX CUDA Tests
204-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
207+
runs-on:
208+
group: aws-g4dn-2xlarge
205209
container:
206210
image: diffusers/diffusers-onnxruntime-cuda
207211
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ --gpus 0
@@ -249,7 +253,8 @@ jobs:
249253
run_torch_compile_tests:
250254
name: PyTorch Compile CUDA tests
251255

252-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
256+
runs-on:
257+
group: aws-g4dn-2xlarge
253258

254259
container:
255260
image: diffusers/diffusers-pytorch-compile-cuda
@@ -291,7 +296,8 @@ jobs:
291296
run_xformers_tests:
292297
name: PyTorch xformers CUDA tests
293298

294-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
299+
runs-on:
300+
group: aws-g4dn-2xlarge
295301

296302
container:
297303
image: diffusers/diffusers-pytorch-xformers-cuda
@@ -332,7 +338,8 @@ jobs:
332338
run_examples_tests:
333339
name: Examples PyTorch CUDA tests on Ubuntu
334340

335-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
341+
runs-on:
342+
group: aws-g4dn-2xlarge
336343

337344
container:
338345
image: diffusers/diffusers-pytorch-cuda

.github/workflows/push_tests_fast.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,28 +29,29 @@ jobs:
2929
config:
3030
- name: Fast PyTorch CPU tests on Ubuntu
3131
framework: pytorch
32-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
32+
runner: aws-general-8-plus
3333
image: diffusers/diffusers-pytorch-cpu
3434
report: torch_cpu
3535
- name: Fast Flax CPU tests on Ubuntu
3636
framework: flax
37-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
37+
runner: aws-general-8-plus
3838
image: diffusers/diffusers-flax-cpu
3939
report: flax_cpu
4040
- name: Fast ONNXRuntime CPU tests on Ubuntu
4141
framework: onnxruntime
42-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
42+
runner: aws-general-8-plus
4343
image: diffusers/diffusers-onnxruntime-cpu
4444
report: onnx_cpu
4545
- name: PyTorch Example CPU tests on Ubuntu
4646
framework: pytorch_examples
47-
runner: [ self-hosted, intel-cpu, 8-cpu, ci ]
47+
runner: aws-general-8-plus
4848
image: diffusers/diffusers-pytorch-cpu
4949
report: torch_example_cpu
5050

5151
name: ${{ matrix.config.name }}
5252

53-
runs-on: ${{ matrix.config.runner }}
53+
runs-on:
54+
group: ${{ matrix.config.runner }}
5455

5556
container:
5657
image: ${{ matrix.config.image }}

.github/workflows/run_tests_from_a_pr.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ env:
2626
jobs:
2727
run_tests:
2828
name: "Run a test on our runner from a PR"
29-
runs-on: [single-gpu, nvidia-gpu, t4, ci]
29+
runs-on:
30+
group: aws-g4dn-2xlarge
3031
container:
3132
image: ${{ github.event.inputs.docker_image }}
3233
options: --gpus 0 --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -70,4 +71,4 @@ jobs:
7071
env:
7172
PY_TEST: ${{ github.event.inputs.test }}
7273
run: |
73-
pytest "$PY_TEST"
74+
pytest "$PY_TEST"

.github/workflows/ssh-pr-runner.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ env:
1919
jobs:
2020
ssh_runner:
2121
name: "SSH"
22-
runs-on: [self-hosted, intel-cpu, 32-cpu, 256-ram, ci]
22+
runs-on:
23+
group: aws-highmemory-32-plus
2324
container:
2425
image: ${{ github.event.inputs.docker_image }}
2526
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --privileged

.github/workflows/ssh-runner.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ env:
2222
jobs:
2323
ssh_runner:
2424
name: "SSH"
25-
runs-on: [single-gpu, nvidia-gpu, "${{ github.event.inputs.runner_type }}", ci]
25+
runs-on:
26+
group: "${{ github.event.inputs.runner_type }}"
2627
container:
2728
image: ${{ github.event.inputs.docker_image }}
2829
options: --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface/diffusers:/mnt/cache/ --gpus 0 --privileged

0 commit comments

Comments
 (0)