Skip to content

Commit 9ee16d6

Browse files
committed
Implements copysign and exp2 elementwise funcs
1 parent 0b5f940 commit 9ee16d6

File tree

5 files changed

+648
-2
lines changed

5 files changed

+648
-2
lines changed

dpctl/tensor/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,13 @@
113113
cbrt,
114114
ceil,
115115
conj,
116+
copysign,
116117
cos,
117118
cosh,
118119
divide,
119120
equal,
120121
exp,
122+
exp2,
121123
expm1,
122124
floor,
123125
floor_divide,
@@ -316,4 +318,6 @@
316318
"argmin",
317319
"prod",
318320
"cbrt",
321+
"exp2",
322+
"copysign",
319323
]

dpctl/tensor/_elementwise_funcs.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1788,3 +1788,62 @@
17881788
cbrt = UnaryElementwiseFunc(
17891789
"cbrt", ti._cbrt_result_type, ti._cbrt, _cbrt_docstring_
17901790
)
1791+
1792+
1793+
# U34: ==== EXP2 (x)
1794+
_exp2_docstring_ = """
1795+
exp2(x, out=None, order='K')
1796+
1797+
Computes the base-2 exponential for each element `x_i` for input array `x`.
1798+
1799+
Args:
1800+
x (usm_ndarray):
1801+
Input array, expected to have a floating-point data type.
1802+
out ({None, usm_ndarray}, optional):
1803+
Output array to populate.
1804+
Array have the correct shape and the expected data type.
1805+
order ("C","F","A","K", optional):
1806+
Memory layout of the newly output array, if parameter `out` is `None`.
1807+
Default: "K".
1808+
Returns:
1809+
usm_narray:
1810+
An array containing the element-wise base-2 exponentials.
1811+
The data type of the returned array is determined by
1812+
the Type Promotion Rules.
1813+
"""
1814+
1815+
exp2 = UnaryElementwiseFunc(
1816+
"exp2", ti._exp2_result_type, ti._exp2, _exp2_docstring_
1817+
)
1818+
1819+
1820+
# B23: ==== COPYSIGN (x1, x2)
1821+
_copysign_docstring_ = """
1822+
copysign(x1, x2, out=None, order='K')
1823+
1824+
Composes a floating-point value with the magnitude of `x1_i` and the sign of
1825+
`x2_i` for each element of input arrays `x1` and `x2`.
1826+
1827+
Args:
1828+
x1 (usm_ndarray):
1829+
First input array, expected to have a real floating-point data type.
1830+
x2 (usm_ndarray):
1831+
Second input array, also expected to have a real floating-point data
1832+
type.
1833+
out ({None, usm_ndarray}, optional):
1834+
Output array to populate.
1835+
Array have the correct shape and the expected data type.
1836+
order ("C","F","A","K", optional):
1837+
Memory layout of the newly output array, if parameter `out` is `None`.
1838+
Default: "K".
1839+
Returns:
1840+
usm_narray:
1841+
An array containing the element-wise results. The data type
1842+
of the returned array is determined by the Type Promotion Rules.
1843+
"""
1844+
copysign = BinaryElementwiseFunc(
1845+
"copysign",
1846+
ti._copysign_result_type,
1847+
ti._copysign,
1848+
_copysign_docstring_,
1849+
)
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
//=== copysign.hpp - Binary function COPYSIGN ------ *-C++-*--/===//
2+
//
3+
// Data Parallel Control (dpctl)
4+
//
5+
// Copyright 2020-2023 Intel Corporation
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing, software
14+
// distributed under the License is distributed on an "AS IS" BASIS,
15+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
// See the License for the specific language governing permissions and
17+
// limitations under the License.
18+
//
19+
//===---------------------------------------------------------------------===//
20+
///
21+
/// \file
22+
/// This file defines kernels for elementwise evaluation of COPYSIGN(x1, x2)
23+
/// function.
24+
//===---------------------------------------------------------------------===//
25+
26+
#pragma once
27+
#include <CL/sycl.hpp>
28+
#include <cstddef>
29+
#include <cstdint>
30+
#include <type_traits>
31+
32+
#include "utils/offset_utils.hpp"
33+
#include "utils/type_dispatch.hpp"
34+
#include "utils/type_utils.hpp"
35+
36+
#include "kernels/elementwise_functions/common.hpp"
37+
#include <pybind11/pybind11.h>
38+
39+
namespace dpctl
40+
{
41+
namespace tensor
42+
{
43+
namespace kernels
44+
{
45+
namespace copysign
46+
{
47+
48+
namespace py = pybind11;
49+
namespace td_ns = dpctl::tensor::type_dispatch;
50+
namespace tu_ns = dpctl::tensor::type_utils;
51+
52+
template <typename argT1, typename argT2, typename resT> struct CopysignFunctor
53+
{
54+
55+
using supports_sg_loadstore = std::true_type;
56+
using supports_vec = std::true_type;
57+
58+
resT operator()(const argT1 &in1, const argT2 &in2) const
59+
{
60+
return sycl::copysign(in1, in2);
61+
}
62+
63+
template <int vec_sz>
64+
sycl::vec<resT, vec_sz>
65+
operator()(const sycl::vec<argT1, vec_sz> &in1,
66+
const sycl::vec<argT2, vec_sz> &in2) const
67+
{
68+
auto tmp = sycl::copysign(in1, in2);
69+
if constexpr (std::is_same_v<resT,
70+
typename decltype(tmp)::element_type>) {
71+
return tmp;
72+
}
73+
else {
74+
using dpctl::tensor::type_utils::vec_cast;
75+
76+
return vec_cast<resT, typename decltype(tmp)::element_type, vec_sz>(
77+
tmp);
78+
}
79+
}
80+
};
81+
82+
template <typename argT1,
83+
typename argT2,
84+
typename resT,
85+
unsigned int vec_sz = 4,
86+
unsigned int n_vecs = 2>
87+
using CopysignContigFunctor =
88+
elementwise_common::BinaryContigFunctor<argT1,
89+
argT2,
90+
resT,
91+
CopysignFunctor<argT1, argT2, resT>,
92+
vec_sz,
93+
n_vecs>;
94+
95+
template <typename argT1, typename argT2, typename resT, typename IndexerT>
96+
using CopysignStridedFunctor = elementwise_common::BinaryStridedFunctor<
97+
argT1,
98+
argT2,
99+
resT,
100+
IndexerT,
101+
CopysignFunctor<argT1, argT2, resT>>;
102+
103+
template <typename T1, typename T2> struct CopysignOutputType
104+
{
105+
using value_type = typename std::disjunction< // disjunction is C++17
106+
// feature, supported by DPC++
107+
td_ns::BinaryTypeMapResultEntry<T1,
108+
sycl::half,
109+
T2,
110+
sycl::half,
111+
sycl::half>,
112+
td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
113+
td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
114+
td_ns::DefaultResultEntry<void>>::result_type;
115+
};
116+
117+
template <typename argT1,
118+
typename argT2,
119+
typename resT,
120+
unsigned int vec_sz,
121+
unsigned int n_vecs>
122+
class copysign_contig_kernel;
123+
124+
template <typename argTy1, typename argTy2>
125+
sycl::event copysign_contig_impl(sycl::queue &exec_q,
126+
size_t nelems,
127+
const char *arg1_p,
128+
py::ssize_t arg1_offset,
129+
const char *arg2_p,
130+
py::ssize_t arg2_offset,
131+
char *res_p,
132+
py::ssize_t res_offset,
133+
const std::vector<sycl::event> &depends = {})
134+
{
135+
return elementwise_common::binary_contig_impl<
136+
argTy1, argTy2, CopysignOutputType, CopysignContigFunctor,
137+
copysign_contig_kernel>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
138+
arg2_offset, res_p, res_offset, depends);
139+
}
140+
141+
template <typename fnT, typename T1, typename T2> struct CopysignContigFactory
142+
{
143+
fnT get()
144+
{
145+
if constexpr (std::is_same_v<
146+
typename CopysignOutputType<T1, T2>::value_type,
147+
void>)
148+
{
149+
fnT fn = nullptr;
150+
return fn;
151+
}
152+
else {
153+
fnT fn = copysign_contig_impl<T1, T2>;
154+
return fn;
155+
}
156+
}
157+
};
158+
159+
template <typename fnT, typename T1, typename T2> struct CopysignTypeMapFactory
160+
{
161+
/*! @brief get typeid for output type of divide(T1 x, T2 y) */
162+
std::enable_if_t<std::is_same<fnT, int>::value, int> get()
163+
{
164+
using rT = typename CopysignOutputType<T1, T2>::value_type;
165+
return td_ns::GetTypeid<rT>{}.get();
166+
}
167+
};
168+
169+
template <typename T1, typename T2, typename resT, typename IndexerT>
170+
class copysign_strided_kernel;
171+
172+
template <typename argTy1, typename argTy2>
173+
sycl::event
174+
copysign_strided_impl(sycl::queue &exec_q,
175+
size_t nelems,
176+
int nd,
177+
const py::ssize_t *shape_and_strides,
178+
const char *arg1_p,
179+
py::ssize_t arg1_offset,
180+
const char *arg2_p,
181+
py::ssize_t arg2_offset,
182+
char *res_p,
183+
py::ssize_t res_offset,
184+
const std::vector<sycl::event> &depends,
185+
const std::vector<sycl::event> &additional_depends)
186+
{
187+
return elementwise_common::binary_strided_impl<
188+
argTy1, argTy2, CopysignOutputType, CopysignStridedFunctor,
189+
copysign_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p,
190+
arg1_offset, arg2_p, arg2_offset, res_p,
191+
res_offset, depends, additional_depends);
192+
}
193+
194+
template <typename fnT, typename T1, typename T2> struct CopysignStridedFactory
195+
{
196+
fnT get()
197+
{
198+
if constexpr (std::is_same_v<
199+
typename CopysignOutputType<T1, T2>::value_type,
200+
void>)
201+
{
202+
fnT fn = nullptr;
203+
return fn;
204+
}
205+
else {
206+
fnT fn = copysign_strided_impl<T1, T2>;
207+
return fn;
208+
}
209+
}
210+
};
211+
212+
} // namespace copysign
213+
} // namespace kernels
214+
} // namespace tensor
215+
} // namespace dpctl

0 commit comments

Comments
 (0)