Skip to content

Commit 69e98ea

Browse files
author
Vincent Moens
committed
amend
1 parent 0d6cba7 commit 69e98ea

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

intermediate_source/pinmem_nonblock.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,30 +221,30 @@ def benchmark_with_profiler(
221221
# .. figure:: /_static/img/pinmem/trace_streamed0_pinned0.png
222222
# :alt:
223223
#
224+
# Using a pinned tensor doesn't change the trace much, both operations are still executed consecutively:
224225

225226
benchmark_with_profiler(streamed=True, pinned=False)
226227

227228
######################################################################
228-
# Using a pinned tensor doesn't change the trace much, both operations are still executed consecutively:
229229
#
230230
# .. figure:: /_static/img/pinmem/trace_streamed0_pinned1.png
231231
# :alt:
232232
#
233+
# Sending a pageable tensor to GPU on a separate stream is also a blocking operation:
233234

234235
benchmark_with_profiler(streamed=False, pinned=True)
235236

236237
######################################################################
237-
# Sending a pageable tensor to GPU on a separate stream is also a blocking operation:
238238
#
239239
# .. figure:: /_static/img/pinmem/trace_streamed1_pinned0.png
240240
# :alt:
241241
#
242+
# Only pinned tensors copies to GPU on a separate stream overlap with another cuda kernel executed on
243+
# the main stream:
242244

243245
benchmark_with_profiler(streamed=True, pinned=True)
244246

245247
######################################################################
246-
# Only pinned tensors copies to GPU on a separate stream overlap with another cuda kernel executed on
247-
# the main stream:
248248
#
249249
# .. figure:: /_static/img/pinmem/trace_streamed1_pinned1.png
250250
# :alt:

0 commit comments

Comments
 (0)