From 72e38bcfa02203d7ec40d3c13d6589e522770169 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 2 Apr 2024 09:31:40 -0700 Subject: [PATCH 1/2] Replaces uses of `std::abs`, `std::ceil`, `std::copysign`, and `std::floor` with `sycl` variants This is necessitated by issues linking when building for CUDA using 2024.1 compiler --- .../kernels/elementwise_functions/abs.hpp | 49 +---------- .../kernels/elementwise_functions/acos.hpp | 6 +- .../kernels/elementwise_functions/acosh.hpp | 10 +-- .../kernels/elementwise_functions/asin.hpp | 6 +- .../kernels/elementwise_functions/asinh.hpp | 6 +- .../kernels/elementwise_functions/atan.hpp | 10 +-- .../kernels/elementwise_functions/atan2.hpp | 2 +- .../kernels/elementwise_functions/atanh.hpp | 11 +-- .../elementwise_functions/cabs_impl.hpp | 84 +++++++++++++++++++ .../kernels/elementwise_functions/ceil.hpp | 2 +- .../kernels/elementwise_functions/cos.hpp | 6 +- .../kernels/elementwise_functions/cosh.hpp | 4 +- .../kernels/elementwise_functions/expm1.hpp | 8 +- .../kernels/elementwise_functions/floor.hpp | 2 +- .../elementwise_functions/floor_divide.hpp | 8 +- .../kernels/elementwise_functions/log1p.hpp | 2 +- .../elementwise_functions/logaddexp.hpp | 2 +- .../kernels/elementwise_functions/proj.hpp | 2 +- .../elementwise_functions/remainder.hpp | 4 +- .../kernels/elementwise_functions/sign.hpp | 8 +- .../kernels/elementwise_functions/sin.hpp | 6 +- .../kernels/elementwise_functions/sinh.hpp | 4 +- .../kernels/elementwise_functions/sqrt.hpp | 21 +++-- .../kernels/elementwise_functions/tan.hpp | 4 +- .../kernels/elementwise_functions/tanh.hpp | 4 +- 25 files changed, 157 insertions(+), 114 deletions(-) create mode 100644 dpctl/tensor/libtensor/include/kernels/elementwise_functions/cabs_impl.hpp diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp index b27d727e70..0eafef8fe3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp @@ -31,8 +31,8 @@ #include #include +#include "cabs_impl.hpp" #include "kernels/elementwise_functions/common.hpp" -#include "sycl_complex.hpp" #include "kernels/dpctl_tensor_types.hpp" #include "utils/offset_utils.hpp" @@ -73,7 +73,7 @@ template struct AbsFunctor } else { if constexpr (is_complex::value) { - return cabs(x); + return detail::cabs(x); } else if constexpr (std::is_same_v || std::is_floating_point_v) @@ -81,51 +81,10 @@ template struct AbsFunctor return (std::signbit(x) ? -x : x); } else { - return std::abs(x); + return sycl::abs(x); } } } - -private: - template realT cabs(std::complex const &z) const - { - // Special values for cabs( x + y * 1j): - // * If x is either +infinity or -infinity and y is any value - // (including NaN), the result is +infinity. - // * If x is any value (including NaN) and y is either +infinity or - // -infinity, the result is +infinity. - // * If x is either +0 or -0, the result is equal to abs(y). - // * If y is either +0 or -0, the result is equal to abs(x). - // * If x is NaN and y is a finite number, the result is NaN. - // * If x is a finite number and y is NaN, the result is NaN. - // * If x is NaN and y is NaN, the result is NaN. - - const realT x = std::real(z); - const realT y = std::imag(z); - - constexpr realT q_nan = std::numeric_limits::quiet_NaN(); - constexpr realT p_inf = std::numeric_limits::infinity(); - - if (std::isinf(x)) { - return p_inf; - } - else if (std::isinf(y)) { - return p_inf; - } - else if (std::isnan(x)) { - return q_nan; - } - else if (std::isnan(y)) { - return q_nan; - } - else { -#ifdef USE_SYCL_FOR_COMPLEX_TYPES - return exprm_ns::abs(exprm_ns::complex(z)); -#else - return std::hypot(std::real(z), std::imag(z)); -#endif - } - } }; template struct AbsContigFactory template struct AbsTypeMapFactory { - /*! @brief get typeid for output type of std::abs(T x) */ + /*! @brief get typeid for output type of abs(T x) */ std::enable_if_t::value, int> get() { using rT = typename AbsOutputType::value_type; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp index 14303b7add..87a0d3b92a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acos.hpp @@ -102,7 +102,7 @@ template struct AcosFunctor */ constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); - if (std::abs(x) > r_eps || std::abs(y) > r_eps) { + if (sycl::fabs(x) > r_eps || sycl::fabs(y) > r_eps) { #ifdef USE_SYCL_FOR_COMPLEX_TYPES using sycl_complexT = exprm_ns::complex; sycl_complexT log_in = @@ -110,7 +110,7 @@ template struct AcosFunctor const realT wx = log_in.real(); const realT wy = log_in.imag(); - const realT rx = std::abs(wy); + const realT rx = sycl::fabs(wy); realT ry = wx + std::log(realT(2)); return resT{rx, (std::signbit(y)) ? ry : -ry}; @@ -118,7 +118,7 @@ template struct AcosFunctor resT log_in = std::log(in); const realT wx = std::real(log_in); const realT wy = std::imag(log_in); - const realT rx = std::abs(wy); + const realT rx = sycl::fabs(wy); realT ry = wx + std::log(realT(2)); return resT{rx, (std::signbit(y)) ? ry : -ry}; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp index 7b57fb9531..1dfde6a523 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/acosh.hpp @@ -109,7 +109,7 @@ template struct AcoshFunctor /* * For large x or y including acos(+-Inf + I*+-Inf) */ - if (std::abs(x) > r_eps || std::abs(y) > r_eps) { + if (sycl::fabs(x) > r_eps || sycl::fabs(y) > r_eps) { #ifdef USE_SYCL_FOR_COMPLEX_TYPES using sycl_complexT = typename exprm_ns::complex; const sycl_complexT log_in = exprm_ns::log(sycl_complexT(in)); @@ -120,7 +120,7 @@ template struct AcoshFunctor const realT wx = std::real(log_in); const realT wy = std::imag(log_in); #endif - const realT rx = std::abs(wy); + const realT rx = sycl::fabs(wy); realT ry = wx + std::log(realT(2)); acos_in = resT{rx, (std::signbit(y)) ? ry : -ry}; } @@ -145,15 +145,15 @@ template struct AcoshFunctor /* acosh(NaN + I*+-Inf) = +Inf + I*NaN */ /* acosh(+-Inf + I*NaN) = +Inf + I*NaN */ if (std::isnan(rx)) { - return resT{std::abs(ry), rx}; + return resT{sycl::fabs(ry), rx}; } /* acosh(0 + I*NaN) = NaN + I*NaN */ if (std::isnan(ry)) { return resT{ry, ry}; } /* ordinary cases */ - const realT res_im = std::copysign(rx, std::imag(in)); - return resT{std::abs(ry), res_im}; + const realT res_im = sycl::copysign(rx, std::imag(in)); + return resT{sycl::fabs(ry), res_im}; } else { static_assert(std::is_floating_point_v || diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp index 45df3b777d..f8504f8c8e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asin.hpp @@ -116,7 +116,7 @@ template struct AsinFunctor */ constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); - if (std::abs(x) > r_eps || std::abs(y) > r_eps) { + if (sycl::fabs(x) > r_eps || sycl::fabs(y) > r_eps) { #ifdef USE_SYCL_FOR_COMPLEX_TYPES using sycl_complexT = exprm_ns::complex; const sycl_complexT z{x, y}; @@ -145,8 +145,8 @@ template struct AsinFunctor wy = std::imag(log_mz); } #endif - const realT asinh_re = std::copysign(wx, x); - const realT asinh_im = std::copysign(wy, y); + const realT asinh_re = sycl::copysign(wx, x); + const realT asinh_im = sycl::copysign(wy, y); return resT{asinh_im, asinh_re}; } /* ordinary cases */ diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp index 9ec6b02450..df9a9a6e32 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/asinh.hpp @@ -105,7 +105,7 @@ template struct AsinhFunctor constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); - if (std::abs(x) > r_eps || std::abs(y) > r_eps) { + if (sycl::fabs(x) > r_eps || sycl::fabs(y) > r_eps) { #ifdef USE_SYCL_FOR_COMPLEX_TYPES using sycl_complexT = exprm_ns::complex; sycl_complexT log_in = (std::signbit(x)) @@ -118,8 +118,8 @@ template struct AsinhFunctor realT wx = std::real(log_in) + std::log(realT(2)); realT wy = std::imag(log_in); #endif - const realT res_re = std::copysign(wx, x); - const realT res_im = std::copysign(wy, y); + const realT res_re = sycl::copysign(wx, x); + const realT res_im = sycl::copysign(wy, y); return resT{res_re, res_im}; } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp index 1dcbb9f219..c03aed6e7f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan.hpp @@ -84,8 +84,8 @@ template struct AtanFunctor if (std::isinf(y)) { const realT pi_half = std::atan(realT(1)) * 2; - const realT atanh_re = std::copysign(realT(0), x); - const realT atanh_im = std::copysign(pi_half, y); + const realT atanh_re = sycl::copysign(realT(0), x); + const realT atanh_im = sycl::copysign(pi_half, y); return resT{atanh_im, atanh_re}; } /* @@ -96,7 +96,7 @@ template struct AtanFunctor else if (std::isnan(y)) { /* atanh(+-Inf + I*NaN) = +-0 + I*NaN */ if (std::isinf(x)) { - const realT atanh_re = std::copysign(realT(0), x); + const realT atanh_re = sycl::copysign(realT(0), x); const realT atanh_im = q_nan; return resT{atanh_im, atanh_re}; } @@ -118,11 +118,11 @@ template struct AtanFunctor */ constexpr realT r_eps = realT(1) / std::numeric_limits::epsilon(); - if (std::abs(x) > r_eps || std::abs(y) > r_eps) { + if (sycl::fabs(x) > r_eps || sycl::fabs(y) > r_eps) { const realT pi_half = std::atan(realT(1)) * 2; const realT atanh_re = realT(0); - const realT atanh_im = std::copysign(pi_half, y); + const realT atanh_im = sycl::copysign(pi_half, y); return resT{atanh_im, atanh_re}; } /* ordinary cases */ diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp index e65fae6ab0..3b587a69f4 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atan2.hpp @@ -58,7 +58,7 @@ template struct Atan2Functor { if (std::isinf(in2) && !std::signbit(in2)) { if (std::isfinite(in1)) { - return std::copysign(resT(0), in1); + return sycl::copysign(resT(0), in1); } } return std::atan2(in1, in2); diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp index c9f2e99610..f4a33d0548 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/atanh.hpp @@ -78,8 +78,8 @@ template struct AtanhFunctor if (std::isinf(y)) { const realT pi_half = std::atan(realT(1)) * 2; - const realT res_re = std::copysign(realT(0), x); - const realT res_im = std::copysign(pi_half, y); + const realT res_re = sycl::copysign(realT(0), x); + const realT res_im = sycl::copysign(pi_half, y); return resT{res_re, res_im}; } /* @@ -90,7 +90,7 @@ template struct AtanhFunctor else if (std::isnan(y)) { /* atanh(+-Inf + I*NaN) = +-0 + I*NaN */ if (std::isinf(x)) { - const realT res_re = std::copysign(realT(0), x); + const realT res_re = sycl::copysign(realT(0), x); return resT{res_re, q_nan}; } /* atanh(+-0 + I*NaN) = +-0 + I*NaN */ @@ -111,11 +111,12 @@ template struct AtanhFunctor */ const realT RECIP_EPSILON = realT(1) / std::numeric_limits::epsilon(); - if (std::abs(x) > RECIP_EPSILON || std::abs(y) > RECIP_EPSILON) { + if (sycl::fabs(x) > RECIP_EPSILON || sycl::fabs(y) > RECIP_EPSILON) + { const realT pi_half = std::atan(realT(1)) * 2; const realT res_re = realT(0); - const realT res_im = std::copysign(pi_half, y); + const realT res_im = sycl::copysign(pi_half, y); return resT{res_re, res_im}; } /* ordinary cases */ diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cabs_impl.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cabs_impl.hpp new file mode 100644 index 0000000000..77930fcd35 --- /dev/null +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cabs_impl.hpp @@ -0,0 +1,84 @@ +//===------- cabs_impl.hpp - Implementation of cabs -------*-C++-*/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines an implementation of the complex absolute value. +//===----------------------------------------------------------------------===// + +#pragma once +#include +#include +#include + +#include "sycl_complex.hpp" + +namespace dpctl +{ +namespace tensor +{ +namespace kernels +{ +namespace detail +{ + +template realT cabs(std::complex const &z) +{ + // Special values for cabs( x + y * 1j): + // * If x is either +infinity or -infinity and y is any value + // (including NaN), the result is +infinity. + // * If x is any value (including NaN) and y is either +infinity or + // -infinity, the result is +infinity. + // * If x is either +0 or -0, the result is equal to abs(y). + // * If y is either +0 or -0, the result is equal to abs(x). + // * If x is NaN and y is a finite number, the result is NaN. + // * If x is a finite number and y is NaN, the result is NaN. + // * If x is NaN and y is NaN, the result is NaN. + + const realT x = std::real(z); + const realT y = std::imag(z); + + constexpr realT q_nan = std::numeric_limits::quiet_NaN(); + constexpr realT p_inf = std::numeric_limits::infinity(); + + if (std::isinf(x)) { + return p_inf; + } + else if (std::isinf(y)) { + return p_inf; + } + else if (std::isnan(x)) { + return q_nan; + } + else if (std::isnan(y)) { + return q_nan; + } + else { +#ifdef USE_SYCL_FOR_COMPLEX_TYPES + return exprm_ns::abs(exprm_ns::complex(z)); +#else + return std::hypot(std::real(z), std::imag(z)); +#endif + } +} + +} // namespace detail +} // namespace kernels +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp index a1d00ddc6d..2fce16e84f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/ceil.hpp @@ -71,7 +71,7 @@ template struct CeilFunctor if (in == 0) { return in; } - return std::ceil(in); + return sycl::ceil(in); } } }; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp index 983674c904..8aa8d8f18f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cos.hpp @@ -109,7 +109,7 @@ template struct CosFunctor */ if (x == realT(0) && !yfinite) { const realT y_m_y = (y - y); - const realT res_im = std::copysign(realT(0), x * y_m_y); + const realT res_im = sycl::copysign(realT(0), x * y_m_y); return resT{y_m_y, res_im}; } @@ -120,7 +120,7 @@ template struct CosFunctor * The sign of 0 in the result is unspecified. */ if (y == realT(0) && !xfinite) { - const realT res_im = std::copysign(realT(0), x) * y; + const realT res_im = sycl::copysign(realT(0), x) * y; return resT{x * x, res_im}; } @@ -144,7 +144,7 @@ template struct CosFunctor */ if (std::isinf(x)) { if (!yfinite) { - return resT{x * x, std::copysign(q_nan, x)}; + return resT{x * x, sycl::copysign(q_nan, x)}; } return resT{(x * x) * std::cos(y), x * std::sin(y)}; } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp index a7a79833ea..c73d2caa61 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/cosh.hpp @@ -99,7 +99,7 @@ template struct CoshFunctor * the same as d(NaN). */ if (x == realT(0) && !yfinite) { - const realT res_im = std::copysign(realT(0), x * q_nan); + const realT res_im = sycl::copysign(realT(0), x * q_nan); return resT{q_nan, res_im}; } @@ -110,7 +110,7 @@ template struct CoshFunctor * The sign of 0 in the result is unspecified. */ if (y == realT(0) && !xfinite) { - const realT res_im = std::copysign(realT(0), x) * y; + const realT res_im = sycl::copysign(realT(0), x) * y; return resT{x * x, res_im}; } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp index 84bdfcba13..114a0cb417 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/expm1.hpp @@ -84,8 +84,8 @@ template struct Expm1Functor return in; } else { - return (resT{std::copysign(x, std::cos(y)), - std::copysign(x, std::sin(y))}); + return (resT{sycl::copysign(x, std::cos(y)), + sycl::copysign(x, std::sin(y))}); } } else { @@ -93,11 +93,11 @@ template struct Expm1Functor if (!std::isfinite(y)) { // copy sign of y to guarantee // conj(expm1(x)) == expm1(conj(x)) - return resT{realT(-1), std::copysign(realT(0), y)}; + return resT{realT(-1), sycl::copysign(realT(0), y)}; } else { return resT{realT(-1), - std::copysign(realT(0), std::sin(y))}; + sycl::copysign(realT(0), std::sin(y))}; } } } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp index a0d25f987b..a709787a65 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor.hpp @@ -71,7 +71,7 @@ template struct FloorFunctor if (in == 0) { return in; } - return std::floor(in); + return sycl::floor(in); } } }; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp index ff6bb65daa..231e18f4da 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -73,7 +73,7 @@ struct FloorDivideFunctor } else { auto div = in1 / in2; - return (div == resT(0)) ? div : resT(std::floor(div)); + return (div == resT(0)) ? div : resT(sycl::floor(div)); } } @@ -106,7 +106,7 @@ struct FloorDivideFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { if (in2[i] != argT2(0)) { - tmp[i] = std::floor(tmp[i]); + tmp[i] = sycl::floor(tmp[i]); } } if constexpr (std::is_same_v) { @@ -330,7 +330,7 @@ template struct FloorDivideInplaceFunctor if (in1 == resT(0)) { return; } - in1 = std::floor(in1); + in1 = sycl::floor(in1); } } @@ -363,7 +363,7 @@ template struct FloorDivideInplaceFunctor #pragma unroll for (int i = 0; i < vec_sz; ++i) { if (in2[i] != argT(0)) { - in1[i] = std::floor(in1[i]); + in1[i] = sycl::floor(in1[i]); } } } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp index 5d7447b2b6..4b524504fc 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/log1p.hpp @@ -81,7 +81,7 @@ template struct Log1pFunctor // imaginary part of result const realT res_im = std::atan2(y, x + 1); - if (std::max(std::abs(x), std::abs(y)) < realT{.1}) { + if (std::max(sycl::fabs(x), sycl::fabs(y)) < realT{.1}) { const realT v = x * (2 + x) + y * y; return resT{std::log1p(v) / 2, res_im}; } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index ad2e588a72..4276893690 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -77,7 +77,7 @@ template struct LogAddExpFunctor for (int i = 0; i < vec_sz; ++i) { if (std::isfinite(diff[i])) { res[i] = std::max(in1[i], in2[i]) + - impl_finite(-std::abs(diff[i])); + impl_finite(-sycl::fabs(diff[i])); } else { using dpctl::tensor::math_utils::logaddexp; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp index 1a69ba0689..866579825c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp @@ -84,7 +84,7 @@ template struct ProjFunctor private: template std::complex value_at_infinity(const T &y) const { - const T res_im = std::copysign(T(0), y); + const T res_im = sycl::copysign(T(0), y); return std::complex{std::numeric_limits::infinity(), res_im}; } }; diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp index ddd919b74d..15c0d6d070 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp @@ -81,7 +81,7 @@ template struct RemainderFunctor } } else { - rem = std::copysign(resT(0), in2); + rem = sycl::copysign(resT(0), in2); } return rem; } @@ -122,7 +122,7 @@ template struct RemainderFunctor } } else { - rem[i] = std::copysign(remT(0), in2[i]); + rem[i] = sycl::copysign(remT(0), in2[i]); } } if constexpr (std::is_same_v) { diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp index b7be7c04c8..db24a2cf14 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp @@ -30,8 +30,8 @@ #include #include +#include "cabs_impl.hpp" #include "kernels/elementwise_functions/common.hpp" -#include "sycl_complex.hpp" #include "kernels/dpctl_tensor_types.hpp" #include "utils/offset_utils.hpp" @@ -79,12 +79,8 @@ template struct SignFunctor return resT(0); } else { -#ifdef USE_SYCL_FOR_COMPLEX_TYPES auto z = exprm_ns::complex(in); - return (z / exprm_ns::abs(z)); -#else - return in / std::abs(in); -#endif + return (z / detail::cabs(in)); } } else { diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp index 9d93ccf6ac..e6570e4891 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp @@ -86,7 +86,7 @@ template struct SinFunctor resT res = std::sin(in); #endif if (in_re == realT(0)) { - res.real(std::copysign(realT(0), in_re)); + res.real(sycl::copysign(realT(0), in_re)); } return res; } @@ -111,7 +111,7 @@ template struct SinFunctor */ if (x == realT(0) && !yfinite) { const realT sinh_im = q_nan; - const realT sinh_re = std::copysign(realT(0), x * sinh_im); + const realT sinh_re = sycl::copysign(realT(0), x * sinh_im); return resT{sinh_im, -sinh_re}; } @@ -127,7 +127,7 @@ template struct SinFunctor return resT{sinh_im, -sinh_re}; } const realT sinh_re = x; - const realT sinh_im = std::copysign(realT(0), y); + const realT sinh_im = sycl::copysign(realT(0), y); return resT{sinh_im, -sinh_re}; } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp index 84a5d5d49b..436fc7ed82 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp @@ -95,7 +95,7 @@ template struct SinhFunctor * the same as d(NaN). */ if (x == realT(0) && !yfinite) { - const realT res_re = std::copysign(realT(0), x * (y - y)); + const realT res_re = sycl::copysign(realT(0), x * (y - y)); return resT{res_re, y - y}; } @@ -108,7 +108,7 @@ template struct SinhFunctor if (std::isnan(x)) { return resT{x, y}; } - const realT res_im = std::copysign(realT(0), y); + const realT res_im = sycl::copysign(realT(0), y); return resT{x, res_im}; } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp index b8fb2616e0..471f258fc2 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp @@ -130,10 +130,12 @@ template struct SqrtFunctor else if (std::isinf(x)) { // x is an infinity // y is either finite, or nan if (std::signbit(x)) { // x == -inf - return {(std::isfinite(y) ? zero : y), std::copysign(p_inf, y)}; + return {(std::isfinite(y) ? zero : y), + sycl::copysign(p_inf, y)}; } else { - return {p_inf, (std::isfinite(y) ? std::copysign(zero, y) : y)}; + return {p_inf, + (std::isfinite(y) ? sycl::copysign(zero, y) : y)}; } } else { // x is finite @@ -205,15 +207,15 @@ template struct SqrtFunctor if (std::signbit(xx)) { const realT m = std::hypot(xx, yy); const realT d = std::sqrt((m - xx) * half); - const realT res_re = (d == zero ? zero : std::abs(yy) / d * half); - const realT res_im = std::copysign(d, yy); + const realT res_re = (d == zero ? zero : sycl::fabs(yy) / d * half); + const realT res_im = sycl::copysign(d, yy); return {sycl::ldexp(res_re, sc), sycl::ldexp(res_im, sc)}; } else { const realT m = std::hypot(xx, yy); const realT d = std::sqrt((m + xx) * half); const realT res_im = - (d == zero) ? std::copysign(zero, yy) : yy * half / d; + (d == zero) ? sycl::copysign(zero, yy) : yy * half / d; return {sycl::ldexp(d, sc), sycl::ldexp(res_im, sc)}; } } @@ -232,15 +234,15 @@ template struct SqrtFunctor if (std::signbit(x)) { const realT m = std::hypot(x, y); const realT d = std::sqrt((m - x) * half); - const realT res_re = (d == zero ? zero : std::abs(y) / d * half); - const realT res_im = std::copysign(d, y); + const realT res_re = (d == zero ? zero : sycl::fabs(y) / d * half); + const realT res_im = sycl::copysign(d, y); return {res_re, res_im}; } else { const realT m = std::hypot(x, y); const realT d = std::sqrt((m + x) * half); const realT res_im = - (d == zero) ? std::copysign(zero, y) : y * half / d; + (d == zero) ? sycl::copysign(zero, y) : y * half / d; return {d, res_im}; } } @@ -258,7 +260,8 @@ template struct SqrtFunctor template std::complex csqrt_finite(T const &x, T const &y) const { - return (std::max(std::abs(x), std::abs(y)) < scaling_threshold()) + return (std::max(std::fabs(x), std::fabs(y)) < + scaling_threshold()) ? csqrt_finite_unscaled(x, y) : csqrt_finite_scaled(x, y); } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp index d926ccde3a..960a441371 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp @@ -100,8 +100,8 @@ template struct TanFunctor const realT tanh_im = (y == realT(0) ? y : x * y); return resT{tanh_im, -tanh_re}; } - const realT tanh_re = std::copysign(realT(1), x); - const realT tanh_im = std::copysign( + const realT tanh_re = sycl::copysign(realT(1), x); + const realT tanh_im = sycl::copysign( realT(0), std::isinf(y) ? y : std::sin(y) * std::cos(y)); return resT{tanh_im, -tanh_re}; } diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp index 993d733406..4e0ef989aa 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp @@ -94,8 +94,8 @@ template struct TanhFunctor if (std::isnan(x)) { return resT{q_nan, (y == realT(0) ? y : q_nan)}; } - const realT res_re = std::copysign(realT(1), x); - const realT res_im = std::copysign( + const realT res_re = sycl::copysign(realT(1), x); + const realT res_im = sycl::copysign( realT(0), std::isinf(y) ? y : std::sin(y) * std::cos(y)); return resT{res_re, res_im}; } From fc4a6129c1b7a32827f0550a7ae44a1ce15b4aa5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 2 Apr 2024 14:36:00 -0500 Subject: [PATCH 2/2] Removed work-around applied to proj The issue that was being work arround has been fixed in 2024.1 compiler --- .../kernels/elementwise_functions/proj.hpp | 30 ++----------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp index 866579825c..3e220b2b13 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp @@ -108,8 +108,8 @@ using ProjStridedFunctor = elementwise_common:: template struct ProjOutputType { - using value_type = typename std::disjunction< // disjunction is C++17 - // feature, supported by DPC++ + // disjunction is C++17 feature, supported by DPC++ + using value_type = typename std::disjunction< td_ns::TypeMapResultEntry>, td_ns::TypeMapResultEntry>, td_ns::DefaultResultEntry>::result_type; @@ -130,30 +130,6 @@ sycl::event proj_contig_impl(sycl::queue &exec_q, exec_q, nelems, arg_p, res_p, depends); } -template -sycl::event -proj_workaround_contig_impl(sycl::queue &exec_q, - size_t nelems, - const char *arg_p, - char *res_p, - const std::vector &depends = {}) -{ - using resTy = typename ProjOutputType::value_type; - - const argTy *arg_tp = reinterpret_cast(arg_p); - resTy *res_tp = reinterpret_cast(res_p); - - sycl::event e = exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on(depends); - cgh.parallel_for({nelems}, [=](sycl::id<1> id) { - size_t i = id[0]; - res_tp[i] = ProjFunctor{}(arg_tp[i]); - }); - }); - - return e; -} - template struct ProjContigFactory { fnT get() @@ -165,7 +141,7 @@ template struct ProjContigFactory } else { if constexpr (std::is_same_v>) { - fnT fn = proj_workaround_contig_impl; + fnT fn = proj_contig_impl; return fn; } else {