diff --git a/.circleci/config.yml b/.circleci/config.yml index b3f3eef23ca..d7a3513a9e4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -190,6 +190,7 @@ jobs: resource_class: gpu.nvidia.small.multi pytorch_tutorial_pr_build_worker_1: <<: *pytorch_tutorial_build_worker_defaults + resource_class: gpu.nvidia.medium pytorch_tutorial_pr_build_worker_10: <<: *pytorch_tutorial_build_worker_defaults pytorch_tutorial_pr_build_worker_11: @@ -234,6 +235,7 @@ jobs: resource_class: gpu.nvidia.small.multi pytorch_tutorial_trunk_build_worker_1: <<: *pytorch_tutorial_build_worker_defaults + resource_class: gpu.nvidia.medium pytorch_tutorial_trunk_build_worker_10: <<: *pytorch_tutorial_build_worker_defaults pytorch_tutorial_trunk_build_worker_11: diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index ba816ca4912..42da5c34f52 100644 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -26,9 +26,11 @@ def indent(indentation, data_list): def jobs(pr_or_trunk, num_workers=20, indentation=2): jobs = {} - # all tutorials that need gpu.nvidia.small.multi machines will be routed - # by get_files_to_run.py to 0th worker + # all tutorials that need gpu.nvidia.small.multi machines will be routed by + # get_files_to_run.py to 0th worker, similarly for gpu.nvidia.medium and the + # 1st worker needs_gpu_nvidia_small_multi = [0] + needs_gpu_nvidia_medium = [1] jobs[f"pytorch_tutorial_{pr_or_trunk}_build_manager"] = { "<<": "*pytorch_tutorial_build_manager_defaults" } @@ -36,6 +38,8 @@ def jobs(pr_or_trunk, num_workers=20, indentation=2): job_info = {"<<": "*pytorch_tutorial_build_worker_defaults"} if i in needs_gpu_nvidia_small_multi: job_info["resource_class"] = "gpu.nvidia.small.multi" + if i in needs_gpu_nvidia_medium: + job_info["resource_class"] = "gpu.nvidia.medium" jobs[f"pytorch_tutorial_{pr_or_trunk}_build_worker_{i}"] = job_info return indent(indentation, jobs).replace("'", "") diff --git a/.jenkins/get_files_to_run.py b/.jenkins/get_files_to_run.py index 91e658a0251..fc5d4310ac7 100644 --- a/.jenkins/get_files_to_run.py +++ b/.jenkins/get_files_to_run.py @@ -39,15 +39,23 @@ def add_to_shard(i, filename): shard_jobs, ) + all_other_files = all_files.copy() needs_gpu_nvidia_small_multi = list( filter(lambda x: get_needs_machine(x) == "gpu.nvidia.small.multi", all_files,) ) + needs_gpu_nvidia_medium = list( + filter(lambda x: get_needs_machine(x) == "gpu.nvidia.medium", all_files,) + ) for filename in needs_gpu_nvidia_small_multi: # currently, the only job that uses gpu.nvidia.small.multi is the 0th worker, # so we'll add all the jobs that need this machine to the 0th worker add_to_shard(0, filename) - - all_other_files = [x for x in all_files if x not in needs_gpu_nvidia_small_multi] + all_other_files.remove(filename) + for filename in needs_gpu_nvidia_medium: + # currently, the only job that uses gpu.nvidia.medium is the 1st worker, + # so we'll add all the jobs that need this machine to the 1st worker + add_to_shard(1, filename) + all_other_files.remove(filename) sorted_files = sorted(all_other_files, key=get_duration, reverse=True,) diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json index 095e2a65ccb..9a881ccfa88 100644 --- a/.jenkins/metadata.json +++ b/.jenkins/metadata.json @@ -24,5 +24,8 @@ }, "intermediate_source/model_parallel_tutorial.py": { "needs": "gpu.nvidia.small.multi" + }, + "intermediate_source/torch_compile_tutorial.py": { + "needs": "gpu.nvidia.medium" } }