From f8abbf7d9bdd5a7d2962df29d76548bc3fc76d0f Mon Sep 17 00:00:00 2001
From: Vicky Tsang <vtsang@amd.com>
Date: Mon, 13 Mar 2023 11:50:56 -0700
Subject: [PATCH 1/4] [WIP] enable AMD GPU

Signed-off-by: Vicky Tsang <vtsang@amd.com>
---
 monai/deploy/packager/util.py | 26 ++++++++++++++++++++------
 monai/deploy/runner/runner.py |  6 +++++-
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/monai/deploy/packager/util.py b/monai/deploy/packager/util.py
index 3db72128..ffce41c2 100644
--- a/monai/deploy/packager/util.py
+++ b/monai/deploy/packager/util.py
@@ -42,7 +42,11 @@ def verify_base_image(base_image: str) -> str:
         str: returns string identifier of the dockerfile template to build MAP
         if valid base image provided, returns empty string otherwise
     """
-    valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
+    import torch
+    if "AMD" not in torch.cuda.get_device_name(0):
+        valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
+    else:
+        valid_prefixes = {"rocm": "ubuntu", "rocm/pytorch": "pytorch"}
 
     for prefix, template in valid_prefixes.items():
         if prefix in base_image:
@@ -89,12 +93,22 @@ def initialize_args(args: Namespace) -> Dict:
     if args.base:
         dockerfile_type = verify_base_image(args.base)
         if not dockerfile_type:
-            logger.error(
-                "Provided base image '{}' is not supported \n \
-                          Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
-                    args.base
+            import torch
+            if "AMD" not in torch.cuda.get_device_name(0):
+                logger.error(
+                    "Provided base image '{}' is not supported \n \
+                            Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
+                        args.base
+                    )
                 )
-            )
+            else:
+                logger.error(
+                    "Provided base image '{}' is not supported \n \
+                            Please provide a ROCm or Pytorch image from https://hub.docker.com/r/rocm/pytorch".format(
+                        args.base
+                    )
+                )
+
             sys.exit(1)
 
     processed_args["dockerfile_type"] = dockerfile_type if args.base else DefaultValues.DOCKERFILE_TYPE
diff --git a/monai/deploy/runner/runner.py b/monai/deploy/runner/runner.py
index ee3e183b..9a145e48 100644
--- a/monai/deploy/runner/runner.py
+++ b/monai/deploy/runner/runner.py
@@ -87,7 +87,9 @@ def run_app(map_name: str, input_path: Path, output_path: Path, app_info: dict,
     # Use nvidia-docker if GPU resources are requested
     requested_gpus = get_requested_gpus(pkg_info)
     if requested_gpus > 0:
-        cmd = "nvidia-docker run --rm -a STDERR"
+        import torch
+        if "AMD" not in torch.cuda.get_device_name(0):
+           cmd = "nvidia-docker run --rm -a STDERR"
 
     if not quiet:
         cmd += " -a STDOUT"
@@ -160,6 +162,8 @@ def pkg_specific_dependency_verification(pkg_info: dict) -> bool:
     """
     requested_gpus = get_requested_gpus(pkg_info)
     if requested_gpus > 0:
+        import torch
+        if "AMD" not in torch.cuda.get_device_name(0):
         # check for nvidia-docker
         prog = "nvidia-docker"
         logger.info('--> Verifying if "%s" is installed...\n', prog)

From aaf14cccd487b4d54e782e8ce74d8c5999dddd91 Mon Sep 17 00:00:00 2001
From: Vicky Tsang <vtsang@amd.com>
Date: Fri, 24 Mar 2023 12:31:05 -0700
Subject: [PATCH 2/4] check for AMD GPU device and rocm installation with
 rocminfo

Signed-off-by: Vicky Tsang <vtsang@amd.com>
---
 monai/deploy/packager/util.py    | 15 +++++++--------
 monai/deploy/runner/runner.py    | 21 ++++++++++----------
 monai/deploy/utils/deviceutil.py | 33 ++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 19 deletions(-)
 create mode 100644 monai/deploy/utils/deviceutil.py

diff --git a/monai/deploy/packager/util.py b/monai/deploy/packager/util.py
index ffce41c2..6506895e 100644
--- a/monai/deploy/packager/util.py
+++ b/monai/deploy/packager/util.py
@@ -23,6 +23,7 @@
 from monai.deploy.exceptions import WrongValueError
 from monai.deploy.packager.constants import DefaultValues
 from monai.deploy.packager.templates import Template
+from monai.deploy.utils.deviceutil import has_rocm
 from monai.deploy.utils.fileutil import checksum
 from monai.deploy.utils.importutil import dist_module_path, dist_requires, get_application
 from monai.deploy.utils.spinner import ProgressSpinner
@@ -42,11 +43,10 @@ def verify_base_image(base_image: str) -> str:
         str: returns string identifier of the dockerfile template to build MAP
         if valid base image provided, returns empty string otherwise
     """
-    import torch
-    if "AMD" not in torch.cuda.get_device_name(0):
-        valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
-    else:
+    if has_rocm():
         valid_prefixes = {"rocm": "ubuntu", "rocm/pytorch": "pytorch"}
+    else:
+        valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
 
     for prefix, template in valid_prefixes.items():
         if prefix in base_image:
@@ -93,18 +93,17 @@ def initialize_args(args: Namespace) -> Dict:
     if args.base:
         dockerfile_type = verify_base_image(args.base)
         if not dockerfile_type:
-            import torch
-            if "AMD" not in torch.cuda.get_device_name(0):
+            if has_rocm():
                 logger.error(
                     "Provided base image '{}' is not supported \n \
-                            Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
+                            Please provide a ROCm or Pytorch image from https://hub.docker.com/r/rocm/pytorch".format(
                         args.base
                     )
                 )
             else:
                 logger.error(
                     "Provided base image '{}' is not supported \n \
-                            Please provide a ROCm or Pytorch image from https://hub.docker.com/r/rocm/pytorch".format(
+                            Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
                         args.base
                     )
                 )
diff --git a/monai/deploy/runner/runner.py b/monai/deploy/runner/runner.py
index 9a145e48..d521bf30 100644
--- a/monai/deploy/runner/runner.py
+++ b/monai/deploy/runner/runner.py
@@ -20,6 +20,7 @@
 from typing import Tuple
 
 from monai.deploy.runner.utils import get_requested_gpus, run_cmd, verify_image
+from monai.deploy.utils.deviceutil import has_rocm
 
 logger = logging.getLogger("app_runner")
 
@@ -87,9 +88,8 @@ def run_app(map_name: str, input_path: Path, output_path: Path, app_info: dict,
     # Use nvidia-docker if GPU resources are requested
     requested_gpus = get_requested_gpus(pkg_info)
     if requested_gpus > 0:
-        import torch
-        if "AMD" not in torch.cuda.get_device_name(0):
-           cmd = "nvidia-docker run --rm -a STDERR"
+        if not has_rocm():
+            cmd = "nvidia-docker run --rm -a STDERR"
 
     if not quiet:
         cmd += " -a STDOUT"
@@ -162,14 +162,13 @@ def pkg_specific_dependency_verification(pkg_info: dict) -> bool:
     """
     requested_gpus = get_requested_gpus(pkg_info)
     if requested_gpus > 0:
-        import torch
-        if "AMD" not in torch.cuda.get_device_name(0):
-        # check for nvidia-docker
-        prog = "nvidia-docker"
-        logger.info('--> Verifying if "%s" is installed...\n', prog)
-        if not shutil.which(prog):
-            logger.error('ERROR: "%s" not installed, please install nvidia-docker.', prog)
-            return False
+        if not has_rocm():
+            # check for nvidia-docker
+            prog = "nvidia-docker"
+            logger.info('--> Verifying if "%s" is installed...\n', prog)
+            if not shutil.which(prog):
+                logger.error('ERROR: "%s" not installed, please install nvidia-docker.', prog)
+                return False
 
     return True
 
diff --git a/monai/deploy/utils/deviceutil.py b/monai/deploy/utils/deviceutil.py
new file mode 100644
index 00000000..65645349
--- /dev/null
+++ b/monai/deploy/utils/deviceutil.py
@@ -0,0 +1,33 @@
+# Copyright 2023 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess
+
+
+def has_rocm():
+    """Return True if ROCm is installed and GPU device is detected.
+
+    Args:
+
+    Returns:
+        True if ROCm is installed and GPU device is detected, otherwise False.
+    """
+    cmd = "rocminfo"
+    process = subprocess.run([cmd], stdout=subprocess.PIPE)
+    for line_in in process.stdout.decode().splitlines():
+        if "Device Type" in line_in and "GPU" in line_in:
+            return True
+
+    return False
+
+
+if __name__ == "__main__":
+    print(has_rocm())

From 0e475f4d93a610e15c399b3777814f566c4be9ec Mon Sep 17 00:00:00 2001
From: Vicky Tsang <vtsang@amd.com>
Date: Fri, 24 Mar 2023 12:34:14 -0700
Subject: [PATCH 3/4] update docs/tutorials with AMD GPU/rocm references

Signed-off-by: Vicky Tsang <vtsang@amd.com>
---
 docs/source/getting_started/tutorials/mednist_app.md      | 8 ++++++++
 docs/source/getting_started/tutorials/monai_bundle_app.md | 4 ++++
 docs/source/getting_started/tutorials/multi_model_app.md  | 4 ++++
 docs/source/getting_started/tutorials/segmentation_app.md | 4 ++++
 4 files changed, 20 insertions(+)

diff --git a/docs/source/getting_started/tutorials/mednist_app.md b/docs/source/getting_started/tutorials/mednist_app.md
index 539d98fe..fa7e3605 100644
--- a/docs/source/getting_started/tutorials/mednist_app.md
+++ b/docs/source/getting_started/tutorials/mednist_app.md
@@ -88,6 +88,14 @@ monai-deploy package examples/apps/mednist_classifier_monaideploy/mednist_classi
     --model classifier.zip \
     -l DEBUG
 
+# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
+# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
+monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 \
+    examples/apps/mednist_classifier_monaideploy/mednist_classifier_monaideploy.py \
+    --tag mednist_app:latest \
+    --model classifier.zip \
+    -l DEBUG
+
 # Run the app with docker image and input file locally
 monai-deploy run mednist_app:latest input output
 cat output/output.json
diff --git a/docs/source/getting_started/tutorials/monai_bundle_app.md b/docs/source/getting_started/tutorials/monai_bundle_app.md
index bc47b1b5..986e1ff2 100644
--- a/docs/source/getting_started/tutorials/monai_bundle_app.md
+++ b/docs/source/getting_started/tutorials/monai_bundle_app.md
@@ -66,6 +66,10 @@ monai-deploy exec ../examples/apps/ai_spleen_seg_app/app.py -i dcm/ -o output -m
 # Please see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker to install nvidia-docker2.
 monai-deploy package examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
 
+# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
+# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
+monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
+
 # Run the app with docker image and input file locally
 monai-deploy run seg_app:latest dcm/ output
 ```
diff --git a/docs/source/getting_started/tutorials/multi_model_app.md b/docs/source/getting_started/tutorials/multi_model_app.md
index 44b2bcbd..307cb6f1 100644
--- a/docs/source/getting_started/tutorials/multi_model_app.md
+++ b/docs/source/getting_started/tutorials/multi_model_app.md
@@ -66,6 +66,10 @@ monai-deploy exec ../examples/apps/examples/apps/ai_multi_ai_app/app.py -i dcm/
 # Please see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker to install nvidia-docker2.
 monai-deploy package -b nvcr.io/nvidia/pytorch:22.08-py3 examples/apps/ai_multi_ai_app --tag multi_model_app:latest --model multi_models -l DEBUG
 
+# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
+# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
+monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 examples/apps/ai_multi_ai_app --tag multi_model_app:latest --model multi_models -l DEBUG
+
 # Run the app with docker image and input file locally
 monai-deploy run multi_model_app:latest dcm/ output
 ```
diff --git a/docs/source/getting_started/tutorials/segmentation_app.md b/docs/source/getting_started/tutorials/segmentation_app.md
index 2905729d..9ee72515 100644
--- a/docs/source/getting_started/tutorials/segmentation_app.md
+++ b/docs/source/getting_started/tutorials/segmentation_app.md
@@ -76,6 +76,10 @@ python examples/apps/ai_spleen_seg_app/app.py -i dcm/ -o output -m model.ts
 # Please see https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker to install nvidia-docker2.
 monai-deploy package examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
 
+# For AMD GPUs, nvidia-docker is not required. Use --base [base image] option to override the docker base image.
+# Please see https://hub.docker.com/r/rocm/pytorch for rocm/pytorch docker images.
+monai-deploy package -b rocm/pytorch:rocm5.4.1_ubuntu20.04_py3.7_pytorch_1.12.1 examples/apps/ai_spleen_seg_app --tag seg_app:latest --model model.ts -l DEBUG
+
 # Run the app with docker image and input file locally
 monai-deploy run seg_app:latest dcm/ output
 ```

From 4989e6e7b6c9d6d6d22b8200a45a074a14729c1d Mon Sep 17 00:00:00 2001
From: Vicky Tsang <vtsang@amd.com>
Date: Tue, 28 Mar 2023 19:03:18 -0700
Subject: [PATCH 4/4] remove rocm dependency in packager

Signed-off-by: Vicky Tsang <vtsang@amd.com>
---
 monai/deploy/packager/util.py    | 24 ++++++++----------------
 monai/deploy/utils/deviceutil.py | 11 +++++++----
 2 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/monai/deploy/packager/util.py b/monai/deploy/packager/util.py
index 6506895e..2249775b 100644
--- a/monai/deploy/packager/util.py
+++ b/monai/deploy/packager/util.py
@@ -23,7 +23,6 @@
 from monai.deploy.exceptions import WrongValueError
 from monai.deploy.packager.constants import DefaultValues
 from monai.deploy.packager.templates import Template
-from monai.deploy.utils.deviceutil import has_rocm
 from monai.deploy.utils.fileutil import checksum
 from monai.deploy.utils.importutil import dist_module_path, dist_requires, get_application
 from monai.deploy.utils.spinner import ProgressSpinner
@@ -43,8 +42,8 @@ def verify_base_image(base_image: str) -> str:
         str: returns string identifier of the dockerfile template to build MAP
         if valid base image provided, returns empty string otherwise
     """
-    if has_rocm():
-        valid_prefixes = {"rocm": "ubuntu", "rocm/pytorch": "pytorch"}
+    if "rocm" in base_image:
+        valid_prefixes = {"rocm/pytorch": "ubuntu"}
     else:
         valid_prefixes = {"nvcr.io/nvidia/cuda": "ubuntu", "nvcr.io/nvidia/pytorch": "pytorch"}
 
@@ -93,20 +92,13 @@ def initialize_args(args: Namespace) -> Dict:
     if args.base:
         dockerfile_type = verify_base_image(args.base)
         if not dockerfile_type:
-            if has_rocm():
-                logger.error(
-                    "Provided base image '{}' is not supported \n \
-                            Please provide a ROCm or Pytorch image from https://hub.docker.com/r/rocm/pytorch".format(
-                        args.base
-                    )
-                )
-            else:
-                logger.error(
-                    "Provided base image '{}' is not supported \n \
-                            Please provide a Cuda or Pytorch image from https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
-                        args.base
-                    )
+            logger.error(
+                "Provided base image '{}' is not supported \n \
+                        Please provide a ROCm or Cuda based Pytorch image from \n \
+                        https://hub.docker.com/r/rocm/pytorch or https://ngc.nvidia.com/ (nvcr.io/nvidia)".format(
+                    args.base
                 )
+            )
 
             sys.exit(1)
 
diff --git a/monai/deploy/utils/deviceutil.py b/monai/deploy/utils/deviceutil.py
index 65645349..6a62e33b 100644
--- a/monai/deploy/utils/deviceutil.py
+++ b/monai/deploy/utils/deviceutil.py
@@ -21,10 +21,13 @@ def has_rocm():
         True if ROCm is installed and GPU device is detected, otherwise False.
     """
     cmd = "rocminfo"
-    process = subprocess.run([cmd], stdout=subprocess.PIPE)
-    for line_in in process.stdout.decode().splitlines():
-        if "Device Type" in line_in and "GPU" in line_in:
-            return True
+    try:
+        process = subprocess.run([cmd], stdout=subprocess.PIPE)
+        for line_in in process.stdout.decode().splitlines():
+            if "Device Type" in line_in and "GPU" in line_in:
+                return True
+    except Exception:
+        pass
 
     return False