From 0ba3e39d736a1f5d46da5728e564569f8cd3fe8c Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 16 Jun 2023 13:45:58 -0700 Subject: [PATCH] [CI] Spawn docker container with 2Gb shmem Should prevent crashes during NCCL initialization. If `data_parallel_tutorial.py` is executed without this option it would segfault in `ncclShmOpen` while executing ` nn.DataParallel(model)` --- .github/workflows/build-tutorials.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-tutorials.yml b/.github/workflows/build-tutorials.yml index c242a1897c6..dafa71bdd2d 100644 --- a/.github/workflows/build-tutorials.yml +++ b/.github/workflows/build-tutorials.yml @@ -87,6 +87,7 @@ jobs: --tty \ --detach \ --user jenkins \ + --shm-size=2gb \ --name="${container_name}" \ -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ -w /var/lib/jenkins/workspace \