From 1c7827d3942a7979a0b429a24523ed31fd52563a Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Tue, 28 Feb 2023 09:58:05 -0800 Subject: [PATCH 1/3] make 1st worker medium --- .circleci/config.yml | 2 ++ .circleci/regenerate.py | 8 ++++++-- .jenkins/get_files_to_run.py | 12 ++++++++++-- .jenkins/metadata.json | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index b3f3eef23ca..d7a3513a9e4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -190,6 +190,7 @@ jobs: resource_class: gpu.nvidia.small.multi pytorch_tutorial_pr_build_worker_1: <<: *pytorch_tutorial_build_worker_defaults + resource_class: gpu.nvidia.medium pytorch_tutorial_pr_build_worker_10: <<: *pytorch_tutorial_build_worker_defaults pytorch_tutorial_pr_build_worker_11: @@ -234,6 +235,7 @@ jobs: resource_class: gpu.nvidia.small.multi pytorch_tutorial_trunk_build_worker_1: <<: *pytorch_tutorial_build_worker_defaults + resource_class: gpu.nvidia.medium pytorch_tutorial_trunk_build_worker_10: <<: *pytorch_tutorial_build_worker_defaults pytorch_tutorial_trunk_build_worker_11: diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index ba816ca4912..42da5c34f52 100644 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -26,9 +26,11 @@ def indent(indentation, data_list): def jobs(pr_or_trunk, num_workers=20, indentation=2): jobs = {} - # all tutorials that need gpu.nvidia.small.multi machines will be routed - # by get_files_to_run.py to 0th worker + # all tutorials that need gpu.nvidia.small.multi machines will be routed by + # get_files_to_run.py to 0th worker, similarly for gpu.nvidia.medium and the + # 1st worker needs_gpu_nvidia_small_multi = [0] + needs_gpu_nvidia_medium = [1] jobs[f"pytorch_tutorial_{pr_or_trunk}_build_manager"] = { "<<": "*pytorch_tutorial_build_manager_defaults" } @@ -36,6 +38,8 @@ def jobs(pr_or_trunk, num_workers=20, indentation=2): job_info = {"<<": "*pytorch_tutorial_build_worker_defaults"} if i in needs_gpu_nvidia_small_multi: job_info["resource_class"] = "gpu.nvidia.small.multi" + if i in needs_gpu_nvidia_medium: + job_info["resource_class"] = "gpu.nvidia.medium" jobs[f"pytorch_tutorial_{pr_or_trunk}_build_worker_{i}"] = job_info return indent(indentation, jobs).replace("'", "") diff --git a/.jenkins/get_files_to_run.py b/.jenkins/get_files_to_run.py index 91e658a0251..fc5d4310ac7 100644 --- a/.jenkins/get_files_to_run.py +++ b/.jenkins/get_files_to_run.py @@ -39,15 +39,23 @@ def add_to_shard(i, filename): shard_jobs, ) + all_other_files = all_files.copy() needs_gpu_nvidia_small_multi = list( filter(lambda x: get_needs_machine(x) == "gpu.nvidia.small.multi", all_files,) ) + needs_gpu_nvidia_medium = list( + filter(lambda x: get_needs_machine(x) == "gpu.nvidia.medium", all_files,) + ) for filename in needs_gpu_nvidia_small_multi: # currently, the only job that uses gpu.nvidia.small.multi is the 0th worker, # so we'll add all the jobs that need this machine to the 0th worker add_to_shard(0, filename) - - all_other_files = [x for x in all_files if x not in needs_gpu_nvidia_small_multi] + all_other_files.remove(filename) + for filename in needs_gpu_nvidia_medium: + # currently, the only job that uses gpu.nvidia.medium is the 1st worker, + # so we'll add all the jobs that need this machine to the 1st worker + add_to_shard(1, filename) + all_other_files.remove(filename) sorted_files = sorted(all_other_files, key=get_duration, reverse=True,) diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json index 095e2a65ccb..ec2108eb86b 100644 --- a/.jenkins/metadata.json +++ b/.jenkins/metadata.json @@ -24,5 +24,8 @@ }, "intermediate_source/model_parallel_tutorial.py": { "needs": "gpu.nvidia.small.multi" + }, + "torch_compile_tutorial_.py": { + "needs": "gpu.nvidia.medium" } } From 33b6d78417b55133828fe11c50742b4bbd1e5f0a Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Tue, 28 Feb 2023 11:31:13 -0800 Subject: [PATCH 2/3] typo --- .jenkins/metadata.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json index ec2108eb86b..5aafced3383 100644 --- a/.jenkins/metadata.json +++ b/.jenkins/metadata.json @@ -25,7 +25,7 @@ "intermediate_source/model_parallel_tutorial.py": { "needs": "gpu.nvidia.small.multi" }, - "torch_compile_tutorial_.py": { + "intermediate_source/torch_compile_tutorial_.py": { "needs": "gpu.nvidia.medium" } } From 98989beefee22c8a6d99c6300b002e5346693ce0 Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Tue, 28 Feb 2023 14:04:25 -0800 Subject: [PATCH 3/3] use future file name --- .jenkins/metadata.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json index 5aafced3383..9a881ccfa88 100644 --- a/.jenkins/metadata.json +++ b/.jenkins/metadata.json @@ -25,7 +25,7 @@ "intermediate_source/model_parallel_tutorial.py": { "needs": "gpu.nvidia.small.multi" }, - "intermediate_source/torch_compile_tutorial_.py": { + "intermediate_source/torch_compile_tutorial.py": { "needs": "gpu.nvidia.medium" } }