Skip to content

Commit 4df81a1

Browse files
committed
accelerator/rocm: add large BAR check
check whether the device has large BAR support enabled. If not, set the rocm_copy_D2H and H2D thresholds to 0, i.e. use hipMemcpy(Async) for all data transfers. Data center (Instinct) devices usually have large BAR enabled, for gaming PCs its not always set by the user. The PR also does a little bit of cleanup in the error handling of the lazy_init() routine. Signed-off-by: Edgar Gabriel <Edgar.Gabriel@amd.com>
1 parent 9ba5034 commit 4df81a1

File tree

1 file changed

+71
-17
lines changed

1 file changed

+71
-17
lines changed

opal/mca/accelerator/rocm/accelerator_rocm_component.c

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ static int accelerator_rocm_component_register(void)
181181

182182
int opal_accelerator_rocm_lazy_init()
183183
{
184+
hipError_t hip_err;
184185
int err = OPAL_SUCCESS;
185186

186187
/* Double checked locking to avoid having to
@@ -196,41 +197,94 @@ int opal_accelerator_rocm_lazy_init()
196197
goto out;
197198
}
198199

199-
err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
200-
if (hipSuccess != err) {
200+
hip_err = hipGetDeviceCount(&opal_accelerator_rocm_num_devices);
201+
if (hipSuccess != hip_err) {
201202
opal_output(0, "Failed to query device count, err=%d %s\n",
202-
err, hipGetErrorString(err));
203-
err = OPAL_ERROR;
203+
hip_err, hipGetErrorString(hip_err));
204+
err = OPAL_ERROR;
204205
goto out;
205206
}
206207

207208
hipStream_t memcpy_stream;
208-
err = hipStreamCreate(&memcpy_stream);
209-
if (hipSuccess != err) {
209+
hip_err = hipStreamCreate(&memcpy_stream);
210+
if (hipSuccess != hip_err) {
210211
opal_output(0, "Could not create hipStream, err=%d %s\n",
211-
err, hipGetErrorString(err));
212-
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
212+
hip_err, hipGetErrorString(hip_err));
213+
err = OPAL_ERROR; // we got hipErrorInvalidValue, pretty bad
213214
goto out;
214215
}
216+
215217
opal_accelerator_rocm_MemcpyStream = malloc(sizeof(hipStream_t));
218+
if (NULL == opal_accelerator_rocm_MemcpyStream) {
219+
opal_output(0, "Could not allocate hipStream\n");
220+
err = OPAL_ERR_OUT_OF_RESOURCE;
221+
goto out;
222+
}
216223
*opal_accelerator_rocm_MemcpyStream = memcpy_stream;
217224

218225
opal_accelerator_rocm_mem_bw = malloc(sizeof(float)*opal_accelerator_rocm_num_devices);
226+
if (NULL == opal_accelerator_rocm_mem_bw) {
227+
opal_output(0, "Could not allocate memory_bw array\n");
228+
err = OPAL_ERR_OUT_OF_RESOURCE;
229+
goto out;
230+
}
231+
219232
for (int i = 0; i < opal_accelerator_rocm_num_devices; ++i) {
220233
int mem_clock_rate; // kHz
221-
err = hipDeviceGetAttribute(&mem_clock_rate,
222-
hipDeviceAttributeMemoryClockRate,
223-
i);
234+
hip_err = hipDeviceGetAttribute(&mem_clock_rate,
235+
hipDeviceAttributeMemoryClockRate,
236+
i);
237+
if (hipSuccess != hip_err) {
238+
opal_output(0, "Failed to query device MemoryClockRate, err=%d %s\n",
239+
hip_err, hipGetErrorString(hip_err));
240+
err = OPAL_ERROR;
241+
goto out;
242+
}
243+
224244
int bus_width; // bit
225-
err = hipDeviceGetAttribute(&bus_width,
226-
hipDeviceAttributeMemoryBusWidth,
227-
i);
228-
/* bw = clock_rate * bus width * 2bit multiplier
229-
* See https://forums.developer.nvidia.com/t/memory-clock-rate/107940
230-
*/
245+
hip_err = hipDeviceGetAttribute(&bus_width,
246+
hipDeviceAttributeMemoryBusWidth,
247+
i);
248+
if (hipSuccess != hip_err) {
249+
opal_output(0, "Failed to query device MemoryBusWidth, err=%d %s\n",
250+
hip_err, hipGetErrorString(hip_err));
251+
err = OPAL_ERROR;
252+
goto out;
253+
}
254+
255+
/* bw = clock_rate * bus width * 2bit multiplier */
231256
float bw = ((float)mem_clock_rate*(float)bus_width*2.0) / 1024 / 1024 / 8;
232257
opal_accelerator_rocm_mem_bw[i] = bw;
233258
}
259+
260+
#if HIP_VERSION >= 60000000
261+
int dev_id;
262+
hip_err = hipGetDevice(&dev_id);
263+
if (hipSuccess != hip_err) {
264+
opal_output(0, "error retrieving current device");
265+
err = OPAL_ERROR;
266+
goto out;
267+
}
268+
269+
int has_large_bar = 0;
270+
hip_err = hipDeviceGetAttribute (&has_large_bar, hipDeviceAttributeIsLargeBar,
271+
dev_id);
272+
if (hipSuccess != hip_err) {
273+
opal_output(0, "error retrieving current device");
274+
err = OPAL_ERROR;
275+
goto out;
276+
}
277+
278+
if (0 == has_large_bar) {
279+
// Without large BAR we have to use hipMemcpy(Async) for all data transfers
280+
opal_output(0, "Large BAR support is not enabled on current device. "
281+
"Enable large BAR support in BIOS (Above 4G Encoding) for "
282+
"better performance\n.");
283+
opal_accelerator_rocm_memcpyH2D_limit = 0;
284+
opal_accelerator_rocm_memcpyD2H_limit = 0;
285+
}
286+
#endif
287+
234288
err = OPAL_SUCCESS;
235289
opal_atomic_wmb();
236290
accelerator_rocm_init_complete = true;

0 commit comments

Comments
 (0)