Skip to content

Commit e426810

Browse files
authored
Merge pull request #188 from leofang/cuda_core_11
Support JIT compilation for CUDA driver & bindings 11.x
2 parents 5066c9f + 74de685 commit e426810

File tree

2 files changed

+88
-23
lines changed

2 files changed

+88
-23
lines changed

cuda_core/cuda/core/experimental/_launcher.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

55
from dataclasses import dataclass
6+
import importlib.metadata
67
from typing import Optional, Union
78

89
import numpy as np
@@ -15,10 +16,30 @@
1516
from cuda.core.experimental._utils import CUDAError, check_or_create_options, handle_return
1617

1718

19+
# TODO: revisit this treatment for py313t builds
20+
_inited = False
21+
_use_ex = None
22+
23+
24+
def _lazy_init():
25+
global _inited
26+
if _inited:
27+
return
28+
29+
global _use_ex
30+
# binding availability depends on cuda-python version
31+
_py_major_minor = tuple(int(v) for v in (
32+
importlib.metadata.version("cuda-python").split(".")[:2]))
33+
_driver_ver = handle_return(cuda.cuDriverGetVersion())
34+
_use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))
35+
_inited = True
36+
37+
1838
@dataclass
1939
class LaunchConfig:
2040
"""
2141
"""
42+
# TODO: expand LaunchConfig to include other attributes
2243
grid: Union[tuple, int] = None
2344
block: Union[tuple, int] = None
2445
stream: Stream = None
@@ -39,6 +60,8 @@ def __post_init__(self):
3960
if self.shmem_size is None:
4061
self.shmem_size = 0
4162

63+
_lazy_init()
64+
4265
def _cast_to_3_tuple(self, cfg):
4366
if isinstance(cfg, int):
4467
if cfg < 1:
@@ -67,24 +90,34 @@ def launch(kernel, config, *kernel_args):
6790
if not isinstance(kernel, Kernel):
6891
raise ValueError
6992
config = check_or_create_options(LaunchConfig, config, "launch config")
93+
if config.stream is None:
94+
raise CUDAError("stream cannot be None")
95+
7096
# TODO: can we ensure kernel_args is valid/safe to use here?
97+
# TODO: merge with HelperKernelParams?
98+
kernel_args = ParamHolder(kernel_args)
99+
args_ptr = kernel_args.ptr
71100

72-
driver_ver = handle_return(cuda.cuDriverGetVersion())
73-
if driver_ver >= 12000:
101+
# Note: CUkernel can still be launched via the old cuLaunchKernel and we do not care
102+
# about the CUfunction/CUkernel difference (which depends on whether the "old" or
103+
# "new" module loading APIs are in use). We check both binding & driver versions here
104+
# mainly to see if the "Ex" API is available and if so we use it, as it's more feature
105+
# rich.
106+
if _use_ex:
74107
drv_cfg = cuda.CUlaunchConfig()
75108
drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
76109
drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
77-
if config.stream is None:
78-
raise CUDAError("stream cannot be None")
79110
drv_cfg.hStream = config.stream._handle
80111
drv_cfg.sharedMemBytes = config.shmem_size
81-
drv_cfg.numAttrs = 0 # FIXME
82-
83-
# TODO: merge with HelperKernelParams?
84-
kernel_args = ParamHolder(kernel_args)
85-
args_ptr = kernel_args.ptr
86-
112+
drv_cfg.numAttrs = 0 # TODO
87113
handle_return(cuda.cuLaunchKernelEx(
88114
drv_cfg, int(kernel._handle), args_ptr, 0))
89115
else:
90-
raise NotImplementedError("TODO")
116+
# TODO: check if config has any unsupported attrs
117+
handle_return(cuda.cuLaunchKernel(
118+
int(kernel._handle),
119+
*config.grid,
120+
*config.block,
121+
config.shmem_size,
122+
config.stream._handle,
123+
args_ptr, 0))

cuda_core/cuda/core/experimental/_module.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,13 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

5+
import importlib.metadata
6+
57
from cuda import cuda, cudart
68
from cuda.core.experimental._utils import handle_return
79

810

911
_backend = {
10-
"new": {
11-
"file": cuda.cuLibraryLoadFromFile,
12-
"data": cuda.cuLibraryLoadData,
13-
"kernel": cuda.cuLibraryGetKernel,
14-
},
1512
"old": {
1613
"file": cuda.cuModuleLoad,
1714
"data": cuda.cuModuleLoadDataEx,
@@ -20,6 +17,34 @@
2017
}
2118

2219

20+
# TODO: revisit this treatment for py313t builds
21+
_inited = False
22+
_py_major_ver = None
23+
_driver_ver = None
24+
_kernel_ctypes = None
25+
26+
27+
def _lazy_init():
28+
global _inited
29+
if _inited:
30+
return
31+
32+
global _py_major_ver, _driver_ver, _kernel_ctypes
33+
# binding availability depends on cuda-python version
34+
_py_major_ver = int(importlib.metadata.version("cuda-python").split(".")[0])
35+
if _py_major_ver >= 12:
36+
_backend["new"] = {
37+
"file": cuda.cuLibraryLoadFromFile,
38+
"data": cuda.cuLibraryLoadData,
39+
"kernel": cuda.cuLibraryGetKernel,
40+
}
41+
_kernel_ctypes = (cuda.CUfunction, cuda.CUkernel)
42+
else:
43+
_kernel_ctypes = (cuda.CUfunction,)
44+
_driver_ver = handle_return(cuda.cuDriverGetVersion())
45+
_inited = True
46+
47+
2348
class Kernel:
2449

2550
__slots__ = ("_handle", "_module",)
@@ -29,13 +54,15 @@ def __init__(self):
2954

3055
@staticmethod
3156
def _from_obj(obj, mod):
32-
assert isinstance(obj, (cuda.CUkernel, cuda.CUfunction))
57+
assert isinstance(obj, _kernel_ctypes)
3358
assert isinstance(mod, ObjectCode)
3459
ker = Kernel.__new__(Kernel)
3560
ker._handle = obj
3661
ker._module = mod
3762
return ker
3863

64+
# TODO: implement from_handle()
65+
3966

4067
class ObjectCode:
4168

@@ -46,26 +73,29 @@ def __init__(self, module, code_type, jit_options=None, *,
4673
symbol_mapping=None):
4774
if code_type not in self._supported_code_type:
4875
raise ValueError
76+
_lazy_init()
4977
self._handle = None
5078

51-
driver_ver = handle_return(cuda.cuDriverGetVersion())
52-
self._loader = _backend["new"] if driver_ver >= 12000 else _backend["old"]
79+
backend = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
80+
self._loader = _backend[backend]
5381

5482
if isinstance(module, str):
55-
if driver_ver < 12000 and jit_options is not None:
83+
# TODO: this option is only taken by the new library APIs, but we have
84+
# a bug that we can't easily support it just yet (NVIDIA/cuda-python#73).
85+
if jit_options is not None:
5686
raise ValueError
5787
module = module.encode()
5888
self._handle = handle_return(self._loader["file"](module))
5989
else:
6090
assert isinstance(module, bytes)
6191
if jit_options is None:
6292
jit_options = {}
63-
if driver_ver >= 12000:
93+
if backend == "new":
6494
args = (module, list(jit_options.keys()), list(jit_options.values()), len(jit_options),
6595
# TODO: support library options
6696
[], [], 0)
67-
else:
68-
args = (module, len(jit_options), jit_options.keys(), jit_options.values())
97+
else: # "old" backend
98+
args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values()))
6999
self._handle = handle_return(self._loader["data"](*args))
70100

71101
self._code_type = code_type
@@ -83,3 +113,5 @@ def get_kernel(self, name):
83113
name = name.encode()
84114
data = handle_return(self._loader["kernel"](self._handle, name))
85115
return Kernel._from_obj(data, self)
116+
117+
# TODO: implement from_handle()

0 commit comments

Comments
 (0)