Skip to content

Commit 95a8c39

Browse files
authored
Merge pull request #12982 from edgargabriel/topic/rocm-large-bar-check
accelerator/rocm: add large BAR check
2 parents 28c2e47 + 4df81a1 commit 95a8c39

File tree

1 file changed

+71
-17
lines changed

1 file changed

+71
-17
lines changed

opal/mca/accelerator/rocm/accelerator_rocm_component.c

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ static int accelerator_rocm_component_register(void)
181181

182182
int opal_accelerator_rocm_lazy_init()
183183
{
184+
hipError_t hip_err;
184185
int err = OPAL_SUCCESS;
185186

186187
/* Double checked locking to avoid having to
@@ -196,41 +197,94 @@ int opal_accelerator_rocm_lazy_init()
196197
goto out;
197198
}
198199

199-
err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
200-
if (hipSuccess != err) {
200+
hip_err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
201+
if (hipSuccess != hip_err) {
201202
opal_output(0, "Failed to query device count, err=%d %s\n",
202-
err, hipGetErrorString(err));
203-
err = OPAL_ERROR;
203+
hip_err, hipGetErrorString(hip_err));
204+
err = OPAL_ERROR;
204205
goto out;
205206
}
206207

207208
hipStream_t memcpy_stream;
208-
err = hipStreamCreate(&memcpy_stream);
209-
if (hipSuccess != err) {
209+
hip_err = hipStreamCreate(&memcpy_stream);
210+
if (hipSuccess != hip_err) {
210211
opal_output(0, "Could not create hipStream, err=%d %s\n",
211-
err, hipGetErrorString(err));
212-
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
212+
hip_err, hipGetErrorString(hip_err));
213+
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
213214
goto out;
214215
}
216+
215217
opal_accelerator_rocm_MemcpyStream = malloc(sizeof(hipStream_t));
218+
if (NULL == opal_accelerator_rocm_MemcpyStream) {
219+
opal_output(0, "Could not allocate hipStream\n");
220+
err = OPAL_ERR_OUT_OF_RESOURCE;
221+
goto out;
222+
}
216223
*opal_accelerator_rocm_MemcpyStream = memcpy_stream;
217224

218225
opal_accelerator_rocm_mem_bw = malloc(sizeof(float)*opal_accelerator_rocm_num_devices);
226+
if (NULL == opal_accelerator_rocm_mem_bw) {
227+
opal_output(0, "Could not allocate memory_bw array\n");
228+
err = OPAL_ERR_OUT_OF_RESOURCE;
229+
goto out;
230+
}
231+
219232
for (int i = 0; i < opal_accelerator_rocm_num_devices; ++i) {
220233
int mem_clock_rate; // kHz
221-
err = hipDeviceGetAttribute(&mem_clock_rate,
222-
hipDeviceAttributeMemoryClockRate,
223-
i);
234+
hip_err = hipDeviceGetAttribute(&mem_clock_rate,
235+
hipDeviceAttributeMemoryClockRate,
236+
i);
237+
if (hipSuccess != hip_err) {
238+
opal_output(0, "Failed to query device MemoryClockRate, err=%d %s\n",
239+
hip_err, hipGetErrorString(hip_err));
240+
err = OPAL_ERROR;
241+
goto out;
242+
}
243+
224244
int bus_width; // bit
225-
err = hipDeviceGetAttribute(&bus_width,
226-
hipDeviceAttributeMemoryBusWidth,
227-
i);
228-
/* bw = clock_rate * bus width * 2bit multiplier
229-
* See https://forums.developer.nvidia.com/t/memory-clock-rate/107940
230-
*/
245+
hip_err = hipDeviceGetAttribute(&bus_width,
246+
hipDeviceAttributeMemoryBusWidth,
247+
i);
248+
if (hipSuccess != hip_err) {
249+
opal_output(0, "Failed to query device MemoryBusWidth, err=%d %s\n",
250+
hip_err, hipGetErrorString(hip_err));
251+
err = OPAL_ERROR;
252+
goto out;
253+
}
254+
255+
/* bw = clock_rate * bus width * 2bit multiplier */
231256
float bw = ((float)mem_clock_rate*(float)bus_width*2.0) / 1024 / 1024 / 8;
232257
opal_accelerator_rocm_mem_bw[i] = bw;
233258
}
259+
260+
#if HIP_VERSION >= 60000000
261+
int dev_id;
262+
hip_err = hipGetDevice(&dev_id);
263+
if (hipSuccess != hip_err) {
264+
opal_output(0, "error retrieving current device");
265+
err = OPAL_ERROR;
266+
goto out;
267+
}
268+
269+
int has_large_bar = 0;
270+
hip_err = hipDeviceGetAttribute (&has_large_bar, hipDeviceAttributeIsLargeBar,
271+
dev_id);
272+
if (hipSuccess != hip_err) {
273+
opal_output(0, "error retrieving current device");
274+
err = OPAL_ERROR;
275+
goto out;
276+
}
277+
278+
if (0 == has_large_bar) {
279+
// Without large BAR we have to use hipMemcpy(Async) for all data transfers
280+
opal_output(0, "Large BAR support is not enabled on current device. "
281+
"Enable large BAR support in BIOS (Above 4G Encoding) for "
282+
"better performance\n.");
283+
opal_accelerator_rocm_memcpyH2D_limit = 0;
284+
opal_accelerator_rocm_memcpyD2H_limit = 0;
285+
}
286+
#endif
287+
234288
err = OPAL_SUCCESS;
235289
opal_atomic_wmb();
236290
accelerator_rocm_init_complete = true;

0 commit comments

Comments
 (0)