From 554232401fffd8f4acb4c9a2ab4288843ed12da3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 9 Dec 2024 10:48:10 -0600 Subject: [PATCH 1/4] Fix lapse in logic of need_copy determination Closes gh-1926 One-dimensinal contiguous arrays are both C- and F- contiguous. The check was written with the assumption that f_contig implies not c_contig, which is untrue for 1D arrays. The reproducer provided in gh-1926 now outputs ``` astype time: 9.441375732421875e-05 a is a_ True asarray time: 6.103515625e-05 a is a__ True ``` Previously, astype took several magnitudes longer due to the copy operation performed. --- dpctl/tensor/_copy_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 9dd53eb383..3bf1ba5c56 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -672,9 +672,9 @@ def astype( f_contig = usm_ary.flags.f_contiguous needs_copy = copy or not ary_dtype == target_dtype if not needs_copy and (order != "K"): - needs_copy = (c_contig and order not in ["A", "C"]) or ( - f_contig and order not in ["A", "F"] - ) + needs_copy = ( + c_contig and not f_contig and order not in ["A", "C"] + ) or (not c_contig and f_contig and order not in ["A", "F"]) if not needs_copy: return usm_ary copy_order = "C" From 8bdf20621ce9d1578a9be6e818e7cbc7f14a23ea Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 9 Dec 2024 11:10:32 -0600 Subject: [PATCH 2/4] Add test based on reproducer in gh-1926 --- dpctl/tests/test_usm_ndarray_ctor.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 7b31df28ea..b4d0f7bc8b 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1435,6 +1435,17 @@ def test_astype_device(): assert r.sycl_queue == q2 +def test_astype_gh_1926(): + get_queue_or_skip() + + x = dpt.ones(10_000) + x_ = dpt.astype(x, x.dtype, copy=False, order="C") + assert x is x_ + + x__ = dpt.astype(x, x.dtype, copy=False, order="F") + assert x is x__ + + def test_copy(): try: X = dpt.usm_ndarray((5, 5), "i4")[2:4, 1:4] From 4b98e947894294f4b8b3b13a2e2ca415d2932d41 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 9 Dec 2024 12:08:54 -0600 Subject: [PATCH 3/4] Add comment to the line changed in this PR --- dpctl/tensor/_copy_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index 3bf1ba5c56..0d23407fcf 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -672,6 +672,10 @@ def astype( f_contig = usm_ary.flags.f_contiguous needs_copy = copy or not ary_dtype == target_dtype if not needs_copy and (order != "K"): + # ensure that order="F" for C-contig input triggers copy, + # and order="C" for F-contig input triggers copy too. + # 1D arrays which are both C- and F- contig should not + # force copying for neither order="F", nor order="C", see gh-1926 needs_copy = ( c_contig and not f_contig and order not in ["A", "C"] ) or (not c_contig and f_contig and order not in ["A", "F"]) From 99a1208a4cd126497848cc9416192b0b4b97479a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 9 Dec 2024 12:09:52 -0600 Subject: [PATCH 4/4] Reduce the size of array in the test --- dpctl/tests/test_usm_ndarray_ctor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index b4d0f7bc8b..90023a2392 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1438,7 +1438,7 @@ def test_astype_device(): def test_astype_gh_1926(): get_queue_or_skip() - x = dpt.ones(10_000) + x = dpt.ones(64) x_ = dpt.astype(x, x.dtype, copy=False, order="C") assert x is x_