Skip to content

Commit f62199b

Browse files
authored
enable AMD GPU (#406)
* [WIP] enable AMD GPU Signed-off-by: Vicky Tsang <vtsang@amd.com> * check for AMD GPU device and rocm installation with rocminfo Signed-off-by: Vicky Tsang <vtsang@amd.com> * update docs/tutorials with AMD GPU/rocm references Signed-off-by: Vicky Tsang <vtsang@amd.com> * remove rocm dependency in packager Signed-off-by: Vicky Tsang <vtsang@amd.com> --------- Signed-off-by: Vicky Tsang <vtsang@amd.com>
1 parent 57b0e9d commit f62199b

File tree

7 files changed

+73
-9
lines changed

7 files changed

+73
-9
lines changed

docs/source/getting_started/tutorials/mednist_app.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,14 @@ monai-deploy package examples/apps/mednist_classifier_monaideploy/mednist_classi
8888
--model classifier.zip \
8989
-l DEBUG
9090

91+
# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
92+
# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
93+
monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 \
94+
examples/apps/mednist_classifier_monaideploy/mednist_classifier_monaideploy.py \
95+
--tag mednist_app:latest \
96+
--model classifier.zip \
97+
-l DEBUG
98+
9199
# Run the app with docker image and input file locally
92100
monai-deploy run mednist_app:latest input output
93101
cat output/output.json

docs/source/getting_started/tutorials/monai_bundle_app.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ monai-deploy exec ../examples/apps/ai_spleen_seg_app/app.py -i dcm/ -o output -m
6666
# Please see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker to install nvidia-docker2.
6767
monai-deploy package examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
6868

69+
# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
70+
# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
71+
monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
72+
6973
# Run the app with docker image and input file locally
7074
monai-deploy run seg_app:latest dcm/ output
7175
```

docs/source/getting_started/tutorials/multi_model_app.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ monai-deploy exec ../examples/apps/examples/apps/ai_multi_ai_app/app.py -i dcm/
6666
# Please see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker to install nvidia-docker2.
6767
monai-deploy package -b nvcr.io/nvidia/pytorch:22.08-py3 examples/apps/ai_multi_ai_app --tag multi_model_app:latest --model multi_models -l DEBUG
6868

69+
# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
70+
# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
71+
monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 examples/apps/ai_multi_ai_app --tag multi_model_app:latest --model multi_models -l DEBUG
72+
6973
# Run the app with docker image and input file locally
7074
monai-deploy run multi_model_app:latest dcm/ output
7175
```

docs/source/getting_started/tutorials/segmentation_app.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ python examples/apps/ai_spleen_seg_app/app.py -i dcm/ -o output -m model.ts
7676
# Please see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker to install nvidia-docker2.
7777
monai-deploy package examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
7878

79+
# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
80+
# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
81+
monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
82+
7983
# Run the app with docker image and input file locally
8084
monai-deploy run seg_app:latest dcm/ output
8185
```

monai/deploy/packager/util.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ def verify_base_image(base_image: str) -> str:
4242
str: returns string identifier of the dockerfile template to build MAP
4343
if valid base image provided, returns empty string otherwise
4444
"""
45-
valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
45+
if "rocm" in base_image:
46+
valid_prefixes = {"rocm/pytorch": "ubuntu"}
47+
else:
48+
valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
4649

4750
for prefix, template in valid_prefixes.items():
4851
if prefix in base_image:
@@ -91,10 +94,12 @@ def initialize_args(args: Namespace) -> Dict:
9194
if not dockerfile_type:
9295
logger.error(
9396
"Provided base image '{}' is not supported \n \
94-
Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
97+
Please provide a ROCm or Cuda based Pytorch image from \n \
98+
https://hub.docker.com/r/rocm/pytorch or https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
9599
args.base
96100
)
97101
)
102+
98103
sys.exit(1)
99104

100105
processed_args["dockerfile_type"] = dockerfile_type if args.base else DefaultValues.DOCKERFILE_TYPE

monai/deploy/runner/runner.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from typing import Tuple
2121

2222
from monai.deploy.runner.utils import get_requested_gpus, run_cmd, verify_image
23+
from monai.deploy.utils.deviceutil import has_rocm
2324

2425
logger = logging.getLogger("app_runner")
2526

@@ -87,7 +88,8 @@ def run_app(map_name: str, input_path: Path, output_path: Path, app_info: dict,
8788
# Use nvidia-docker if GPU resources are requested
8889
requested_gpus = get_requested_gpus(pkg_info)
8990
if requested_gpus > 0:
90-
cmd = "nvidia-docker run --rm -a STDERR"
91+
if not has_rocm():
92+
cmd = "nvidia-docker run --rm -a STDERR"
9193

9294
if not quiet:
9395
cmd += " -a STDOUT"
@@ -160,12 +162,13 @@ def pkg_specific_dependency_verification(pkg_info: dict) -> bool:
160162
"""
161163
requested_gpus = get_requested_gpus(pkg_info)
162164
if requested_gpus > 0:
163-
# check for nvidia-docker
164-
prog = "nvidia-docker"
165-
logger.info('--> Verifying if "%s" is installed...\n', prog)
166-
if not shutil.which(prog):
167-
logger.error('ERROR: "%s" not installed, please install nvidia-docker.', prog)
168-
return False
165+
if not has_rocm():
166+
# check for nvidia-docker
167+
prog = "nvidia-docker"
168+
logger.info('--> Verifying if "%s" is installed...\n', prog)
169+
if not shutil.which(prog):
170+
logger.error('ERROR: "%s" not installed, please install nvidia-docker.', prog)
171+
return False
169172

170173
return True
171174

monai/deploy/utils/deviceutil.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2023 MONAI Consortium
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
# Unless required by applicable law or agreed to in writing, software
7+
# distributed under the License is distributed on an "AS IS" BASIS,
8+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
# See the License for the specific language governing permissions and
10+
# limitations under the License.
11+
12+
import subprocess
13+
14+
15+
def has_rocm():
16+
"""Return True if ROCm is installed and GPU device is detected.
17+
18+
Args:
19+
20+
Returns:
21+
True if ROCm is installed and GPU device is detected, otherwise False.
22+
"""
23+
cmd = "rocminfo"
24+
try:
25+
process = subprocess.run([cmd], stdout=subprocess.PIPE)
26+
for line_in in process.stdout.decode().splitlines():
27+
if "Device Type" in line_in and "GPU" in line_in:
28+
return True
29+
except Exception:
30+
pass
31+
32+
return False
33+
34+
35+
if __name__ == "__main__":
36+
print(has_rocm())

0 commit comments

Comments
 (0)