Skip to content

Commit c90e5d5

Browse files
authored
Merge branch 'master' into elementwise-functions-log-variants
2 parents 9c6f67d + 7510d7e commit c90e5d5

File tree

11 files changed

+1722
-12
lines changed

11 files changed

+1722
-12
lines changed

dpctl/tensor/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,15 @@
118118
logical_or,
119119
logical_xor,
120120
multiply,
121+
negative,
121122
not_equal,
123+
positive,
124+
pow,
122125
proj,
123126
real,
124127
sin,
125128
sqrt,
129+
square,
126130
subtract,
127131
)
128132
from ._reduction import sum
@@ -224,12 +228,16 @@
224228
"log1p",
225229
"log2",
226230
"log10",
231+
"negative",
232+
"positive",
227233
"proj",
228234
"real",
229235
"sin",
230236
"sqrt",
237+
"square",
231238
"divide",
232239
"multiply",
240+
"pow",
233241
"subtract",
234242
"equal",
235243
"not_equal",

dpctl/tensor/_elementwise_funcs.py

Lines changed: 84 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,27 @@
761761
)
762762

763763
# U25: ==== NEGATIVE (x)
764-
# FIXME: implement U25
764+
_negative_docstring_ = """
765+
negative(x, out=None, order='K')
766+
767+
Computes the numerical negative for each element `x_i` of input array `x`.
768+
Args:
769+
x (usm_ndarray):
770+
Input array, expected to have numeric data type.
771+
out (usm_ndarray):
772+
Output array to populate. Array must have the correct
773+
shape and the expected data type.
774+
order ("C","F","A","K", optional): memory layout of the new
775+
output array, if parameter `out` is `None`.
776+
Default: "K".
777+
Return:
778+
usm_ndarray:
779+
An array containing the negative of `x`.
780+
"""
781+
782+
negative = UnaryElementwiseFunc(
783+
"negative", ti._negative_result_type, ti._negative, _negative_docstring_
784+
)
765785

766786
# B20: ==== NOT_EQUAL (x1, x2)
767787
_not_equal_docstring_ = """
@@ -793,10 +813,48 @@
793813
)
794814

795815
# U26: ==== POSITIVE (x)
796-
# FIXME: implement U26
816+
_positive_docstring_ = """
817+
positive(x, out=None, order='K')
818+
819+
Computes the numerical positive for each element `x_i` of input array `x`.
820+
Args:
821+
x (usm_ndarray):
822+
Input array, expected to have numeric data type.
823+
out (usm_ndarray):
824+
Output array to populate. Array must have the correct
825+
shape and the expected data type.
826+
order ("C","F","A","K", optional): memory layout of the new
827+
output array, if parameter `out` is `None`.
828+
Default: "K".
829+
Return:
830+
usm_ndarray:
831+
An array containing the values of `x`.
832+
"""
833+
834+
positive = UnaryElementwiseFunc(
835+
"positive", ti._positive_result_type, ti._positive, _positive_docstring_
836+
)
797837

798838
# B21: ==== POW (x1, x2)
799-
# FIXME: implement B21
839+
_pow_docstring_ = """
840+
pow(x1, x2, out=None, order='K')
841+
842+
Calculates `x1_i` raised to `x2_i` for each element `x1_i` of the input array
843+
`x1` with the respective element `x2_i` of the input array `x2`.
844+
845+
Args:
846+
x1 (usm_ndarray):
847+
First input array, expected to have a numeric data type.
848+
x2 (usm_ndarray):
849+
Second input array, also expected to have a numeric data type.
850+
Returns:
851+
usm_ndarray:
852+
an array containing the element-wise result. The data type of
853+
the returned array is determined by the Type Promotion Rules.
854+
"""
855+
pow = BinaryElementwiseFunc(
856+
"pow", ti._pow_result_type, ti._pow, _pow_docstring_
857+
)
800858

801859
# U??: ==== PROJ (x)
802860
_proj_docstring = """
@@ -884,7 +942,29 @@
884942
# FIXME: implement U31
885943

886944
# U32: ==== SQUARE (x)
887-
# FIXME: implement U32
945+
_square_docstring_ = """
946+
square(x, out=None, order='K')
947+
948+
Computes `x_i**2` (or `x_i*x_i`) for each element `x_i` of input array `x`.
949+
Args:
950+
x (usm_ndarray):
951+
Input array, expected to have numeric data type.
952+
out ({None, usm_ndarray}, optional):
953+
Output array to populate.
954+
Array have the correct shape and the expected data type.
955+
order ("C","F","A","K", optional):
956+
Memory layout of the newly output array, if parameter `out` is `None`.
957+
Default: "K".
958+
Returns:
959+
usm_ndarray:
960+
An array containing the square `x`.
961+
The data type of the returned array is determined by
962+
the Type Promotion Rules.
963+
"""
964+
965+
square = UnaryElementwiseFunc(
966+
"square", ti._square_result_type, ti._square, _square_docstring_
967+
)
888968

889969
# U33: ==== SQRT (x)
890970
_sqrt_docstring_ = """
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
//=== negative.hpp - Unary function POSITIVE ------ *-C++-*--/===//
2+
//
3+
// Data Parallel Control (dpctl)
4+
//
5+
// Copyright 2020-2023 Intel Corporation
6+
//
7+
// Licensed under the Apache License, Version 2.0 (the "License");
8+
// you may not use this file except in compliance with the License.
9+
// You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing, software
14+
// distributed under the License is distributed on an "AS IS" BASIS,
15+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
// See the License for the specific language governing permissions and
17+
// limitations under the License.
18+
//
19+
//===---------------------------------------------------------------------===//
20+
///
21+
/// \file
22+
/// This file defines kernels for elementwise evaluation of POSITIVE(x)
23+
/// function that returns x.
24+
//===---------------------------------------------------------------------===//
25+
26+
#pragma once
27+
#include <CL/sycl.hpp>
28+
#include <cmath>
29+
#include <cstddef>
30+
#include <cstdint>
31+
#include <type_traits>
32+
33+
#include "kernels/elementwise_functions/common.hpp"
34+
35+
#include "utils/offset_utils.hpp"
36+
#include "utils/type_dispatch.hpp"
37+
#include "utils/type_utils.hpp"
38+
#include <pybind11/pybind11.h>
39+
40+
#include <iostream>
41+
42+
namespace dpctl
43+
{
44+
namespace tensor
45+
{
46+
namespace kernels
47+
{
48+
namespace negative
49+
{
50+
51+
namespace py = pybind11;
52+
namespace td_ns = dpctl::tensor::type_dispatch;
53+
54+
using dpctl::tensor::type_utils::is_complex;
55+
using dpctl::tensor::type_utils::vec_cast;
56+
57+
template <typename argT, typename resT> struct NegativeFunctor
58+
{
59+
60+
using is_constant = typename std::false_type;
61+
// constexpr resT constant_value = resT{};
62+
using supports_vec = typename std::false_type;
63+
using supports_sg_loadstore = typename std::negation<
64+
std::disjunction<is_complex<resT>, is_complex<argT>>>;
65+
66+
resT operator()(const argT &x)
67+
{
68+
return -x;
69+
}
70+
};
71+
72+
template <typename argT,
73+
typename resT = argT,
74+
unsigned int vec_sz = 4,
75+
unsigned int n_vecs = 2>
76+
using NegativeContigFunctor = elementwise_common::
77+
UnaryContigFunctor<argT, resT, NegativeFunctor<argT, resT>, vec_sz, n_vecs>;
78+
79+
template <typename T> struct NegativeOutputType
80+
{
81+
using value_type = typename std::disjunction< // disjunction is C++17
82+
// feature, supported by DPC++
83+
td_ns::TypeMapResultEntry<T, std::uint8_t>,
84+
td_ns::TypeMapResultEntry<T, std::uint16_t>,
85+
td_ns::TypeMapResultEntry<T, std::uint32_t>,
86+
td_ns::TypeMapResultEntry<T, std::uint64_t>,
87+
td_ns::TypeMapResultEntry<T, std::int8_t>,
88+
td_ns::TypeMapResultEntry<T, std::int16_t>,
89+
td_ns::TypeMapResultEntry<T, std::int32_t>,
90+
td_ns::TypeMapResultEntry<T, std::int64_t>,
91+
td_ns::TypeMapResultEntry<T, sycl::half>,
92+
td_ns::TypeMapResultEntry<T, float>,
93+
td_ns::TypeMapResultEntry<T, double>,
94+
td_ns::TypeMapResultEntry<T, std::complex<float>>,
95+
td_ns::TypeMapResultEntry<T, std::complex<double>>,
96+
td_ns::DefaultResultEntry<void>>::result_type;
97+
};
98+
99+
template <typename T1, typename T2, unsigned int vec_sz, unsigned int n_vecs>
100+
class negative_contig_kernel;
101+
102+
typedef sycl::event (*negative_contig_impl_fn_ptr_t)(
103+
sycl::queue,
104+
size_t,
105+
const char *,
106+
char *,
107+
const std::vector<sycl::event> &);
108+
109+
template <typename argTy>
110+
sycl::event negative_contig_impl(sycl::queue exec_q,
111+
size_t nelems,
112+
const char *arg_p,
113+
char *res_p,
114+
const std::vector<sycl::event> &depends = {})
115+
{
116+
sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) {
117+
cgh.depends_on(depends);
118+
119+
size_t lws = 64;
120+
constexpr unsigned int vec_sz = 4;
121+
constexpr unsigned int n_vecs = 2;
122+
const size_t n_groups =
123+
((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
124+
const auto gws_range = sycl::range<1>(n_groups * lws);
125+
const auto lws_range = sycl::range<1>(lws);
126+
127+
using resTy = typename NegativeOutputType<argTy>::value_type;
128+
const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
129+
resTy *res_tp = reinterpret_cast<resTy *>(res_p);
130+
131+
cgh.parallel_for<negative_contig_kernel<argTy, resTy, vec_sz, n_vecs>>(
132+
sycl::nd_range<1>(gws_range, lws_range),
133+
NegativeContigFunctor<argTy, resTy, vec_sz, n_vecs>(arg_tp, res_tp,
134+
nelems));
135+
});
136+
return negative_ev;
137+
}
138+
139+
template <typename fnT, typename T> struct NegativeContigFactory
140+
{
141+
fnT get()
142+
{
143+
if constexpr (std::is_same_v<typename NegativeOutputType<T>::value_type,
144+
void>) {
145+
fnT fn = nullptr;
146+
return fn;
147+
}
148+
else {
149+
fnT fn = negative_contig_impl<T>;
150+
return fn;
151+
}
152+
}
153+
};
154+
155+
template <typename fnT, typename T> struct NegativeTypeMapFactory
156+
{
157+
/*! @brief get typeid for output type of std::negative(T x) */
158+
std::enable_if_t<std::is_same<fnT, int>::value, int> get()
159+
{
160+
using rT = typename NegativeOutputType<T>::value_type;
161+
;
162+
return td_ns::GetTypeid<rT>{}.get();
163+
}
164+
};
165+
166+
template <typename argTy, typename resTy, typename IndexerT>
167+
using NegativeStridedFunctor = elementwise_common::
168+
UnaryStridedFunctor<argTy, resTy, IndexerT, NegativeFunctor<argTy, resTy>>;
169+
170+
template <typename T1, typename T2, typename T3> class negative_strided_kernel;
171+
172+
typedef sycl::event (*negative_strided_impl_fn_ptr_t)(
173+
sycl::queue,
174+
size_t,
175+
int,
176+
const py::ssize_t *,
177+
const char *,
178+
py::ssize_t,
179+
char *,
180+
py::ssize_t,
181+
const std::vector<sycl::event> &,
182+
const std::vector<sycl::event> &);
183+
184+
template <typename argTy>
185+
sycl::event
186+
negative_strided_impl(sycl::queue exec_q,
187+
size_t nelems,
188+
int nd,
189+
const py::ssize_t *shape_and_strides,
190+
const char *arg_p,
191+
py::ssize_t arg_offset,
192+
char *res_p,
193+
py::ssize_t res_offset,
194+
const std::vector<sycl::event> &depends,
195+
const std::vector<sycl::event> &additional_depends)
196+
{
197+
sycl::event negative_ev = exec_q.submit([&](sycl::handler &cgh) {
198+
cgh.depends_on(depends);
199+
cgh.depends_on(additional_depends);
200+
201+
using resTy = typename NegativeOutputType<argTy>::value_type;
202+
using IndexerT =
203+
typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
204+
205+
IndexerT indexer{nd, arg_offset, res_offset, shape_and_strides};
206+
207+
const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
208+
resTy *res_tp = reinterpret_cast<resTy *>(res_p);
209+
210+
cgh.parallel_for<negative_strided_kernel<argTy, resTy, IndexerT>>(
211+
{nelems}, NegativeStridedFunctor<argTy, resTy, IndexerT>(
212+
arg_tp, res_tp, indexer));
213+
});
214+
return negative_ev;
215+
}
216+
217+
template <typename fnT, typename T> struct NegativeStridedFactory
218+
{
219+
fnT get()
220+
{
221+
if constexpr (std::is_same_v<typename NegativeOutputType<T>::value_type,
222+
void>) {
223+
fnT fn = nullptr;
224+
return fn;
225+
}
226+
else {
227+
fnT fn = negative_strided_impl<T>;
228+
return fn;
229+
}
230+
}
231+
};
232+
233+
} // namespace negative
234+
} // namespace kernels
235+
} // namespace tensor
236+
} // namespace dpctl

0 commit comments

Comments
 (0)