diff --git a/intermediate_source/scaled_dot_product_attention_tutorial.py b/intermediate_source/scaled_dot_product_attention_tutorial.py index ac1144159cd..666d240ece1 100644 --- a/intermediate_source/scaled_dot_product_attention_tutorial.py +++ b/intermediate_source/scaled_dot_product_attention_tutorial.py @@ -114,7 +114,7 @@ def benchmark_torch_function_in_microseconds(f, *args, **kwargs): # # Depending on what machine you ran the above cell on and what hardware is # available, your results might be different. -# - If you don’t have a GPU and are running on CPU then the context manager +# - If you don’t have a GPU and are running on CPU then with FP32 the context manager # will have no effect and all three runs should return similar timings. # - Depending on what compute capability your graphics card supports # flash attention or memory efficient might have failed.