Skip to content

Commit beebaf0

Browse files
authored
Merge branch 'master' into impl-array_namespace_info
2 parents 702558b + 0181564 commit beebaf0

File tree

6 files changed

+541
-51
lines changed

6 files changed

+541
-51
lines changed

dpnp/dpnp_iface_histograms.py

Lines changed: 198 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,25 @@
4242

4343
import dpctl.utils as dpu
4444
import numpy
45-
from dpctl.tensor._type_utils import _can_cast
4645

4746
import dpnp
4847

4948
# pylint: disable=no-name-in-module
5049
import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext
50+
from dpnp.dpnp_utils.dpnp_utils_common import (
51+
result_type_for_device,
52+
to_supported_dtypes,
53+
)
5154

5255
# pylint: disable=no-name-in-module
53-
from .dpnp_utils import get_usm_allocations, map_dtype_to_device
56+
from .dpnp_utils import get_usm_allocations
5457

5558
__all__ = [
5659
"bincount",
5760
"digitize",
5861
"histogram",
5962
"histogram_bin_edges",
63+
"histogram2d",
6064
"histogramdd",
6165
]
6266

@@ -65,33 +69,15 @@
6569
_range = range
6670

6771

68-
def _result_type_for_device(dtypes, device):
69-
rt = dpnp.result_type(*dtypes)
70-
return map_dtype_to_device(rt, device)
71-
72-
7372
def _align_dtypes(a_dtype, bins_dtype, ntype, supported_types, device):
74-
has_fp64 = device.has_aspect_fp64
75-
has_fp16 = device.has_aspect_fp16
76-
77-
a_bin_dtype = _result_type_for_device([a_dtype, bins_dtype], device)
73+
a_bin_dtype = result_type_for_device([a_dtype, bins_dtype], device)
7874

7975
# histogram implementation doesn't support uint64 as histogram type
8076
# we can use int64 instead. Result would be correct even in case of overflow
8177
if ntype == numpy.uint64:
8278
ntype = dpnp.int64
8379

84-
if (a_bin_dtype, ntype) in supported_types:
85-
return a_bin_dtype, ntype
86-
87-
for sample_type, hist_type in supported_types:
88-
if _can_cast(
89-
a_bin_dtype, sample_type, has_fp16, has_fp64
90-
) and _can_cast(ntype, hist_type, has_fp16, has_fp64):
91-
return sample_type, hist_type
92-
93-
# should not happen
94-
return None, None # pragma: no cover
80+
return to_supported_dtypes([a_bin_dtype, ntype], supported_types, device)
9581

9682

9783
def _ravel_check_a_and_weights(a, weights):
@@ -138,6 +124,9 @@ def _is_finite(a):
138124
return numpy.isfinite(a)
139125

140126
if range is not None:
127+
if len(range) != 2:
128+
raise ValueError("range argument must consist of 2 elements.")
129+
141130
first_edge, last_edge = range
142131
if first_edge > last_edge:
143132
raise ValueError("max must be larger than min in range parameter.")
@@ -520,6 +509,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
520509
If `bins` is a sequence, it defines a monotonically increasing array
521510
of bin edges, including the rightmost edge, allowing for non-uniform
522511
bin widths.
512+
523513
Default: ``10``.
524514
range : {None, 2-tuple of float}, optional
525515
The lower and upper range of the bins. If not provided, range is simply
@@ -528,6 +518,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
528518
affects the automatic bin computation as well. While bin width is
529519
computed to be optimal based on the actual data within `range`, the bin
530520
count will fill the entire range including portions containing no data.
521+
531522
Default: ``None``.
532523
density : {None, bool}, optional
533524
If ``False`` or ``None``, the result will contain the number of samples
@@ -536,6 +527,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
536527
the range is ``1``. Note that the sum of the histogram values will not
537528
be equal to ``1`` unless bins of unity width are chosen; it is not
538529
a probability *mass* function.
530+
539531
Default: ``None``.
540532
weights : {None, dpnp.ndarray, usm_ndarray}, optional
541533
An array of weights, of the same shape as `a`. Each value in `a` only
@@ -545,6 +537,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
545537
Please note that the ``dtype`` of `weights` will also become the
546538
``dtype`` of the returned accumulator (`hist`), so it must be large
547539
enough to hold accumulated values as well.
540+
548541
Default: ``None``.
549542
550543
Returns
@@ -751,6 +744,167 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
751744
return bin_edges
752745

753746

747+
def histogram2d(x, y, bins=10, range=None, density=None, weights=None):
748+
"""
749+
Compute the bi-dimensional histogram of two data samples.
750+
751+
Parameters
752+
----------
753+
x : {dpnp.ndarray, usm_ndarray} of shape (N,)
754+
An array containing the `x` coordinates of the points to be
755+
histogrammed.
756+
y : {dpnp.ndarray, usm_ndarray} of shape (N,)
757+
An array containing the `y` coordinates of the points to be
758+
histogrammed.
759+
bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \
760+
[int, array], [array, int]}, optional
761+
762+
The bins specification:
763+
764+
* If int, the number of bins for the two dimensions (nx=ny=bins).
765+
* If array, the bin edges for the two dimensions
766+
(x_edges=y_edges=bins).
767+
* If [int, int], the number of bins in each dimension
768+
(nx, ny = bins).
769+
* If [array, array], the bin edges in each dimension
770+
(x_edges, y_edges = bins).
771+
* A combination [int, array] or [array, int], where int
772+
is the number of bins and array is the bin edges.
773+
774+
Default: ``10``.
775+
range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional
776+
The leftmost and rightmost edges of the bins along each dimension
777+
If ``None`` the ranges are
778+
``[[x.min(), x.max()], [y.min(), y.max()]]``. All values outside
779+
of this range will be considered outliers and not tallied in the
780+
histogram.
781+
782+
Default: ``None``.
783+
density : {None, bool}, optional
784+
If ``False`` or ``None``, the default, returns the number of
785+
samples in each bin.
786+
If ``True``, returns the probability *density* function at the bin,
787+
``bin_count / sample_count / bin_volume``.
788+
789+
Default: ``None``.
790+
weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional
791+
An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
792+
Weights are normalized to ``1`` if `density` is ``True``.
793+
If `density` is ``False``, the values of the returned histogram
794+
are equal to the sum of the weights belonging to the samples
795+
falling into each bin.
796+
If ``None`` all samples are assigned a weight of ``1``.
797+
798+
Default: ``None``.
799+
Returns
800+
-------
801+
H : dpnp.ndarray of shape (nx, ny)
802+
The bi-dimensional histogram of samples `x` and `y`. Values in `x`
803+
are histogrammed along the first dimension and values in `y` are
804+
histogrammed along the second dimension.
805+
xedges : dpnp.ndarray of shape (nx+1,)
806+
The bin edges along the first dimension.
807+
yedges : dpnp.ndarray of shape (ny+1,)
808+
The bin edges along the second dimension.
809+
810+
See Also
811+
--------
812+
:obj:`dpnp.histogram` : 1D histogram
813+
:obj:`dpnp.histogramdd` : Multidimensional histogram
814+
815+
Notes
816+
-----
817+
When `density` is ``True``, then the returned histogram is the sample
818+
density, defined such that the sum over bins of the product
819+
``bin_value * bin_area`` is 1.
820+
821+
Please note that the histogram does not follow the Cartesian convention
822+
where `x` values are on the abscissa and `y` values on the ordinate
823+
axis. Rather, `x` is histogrammed along the first dimension of the
824+
array (vertical), and `y` along the second dimension of the array
825+
(horizontal). This ensures compatibility with `histogramdd`.
826+
827+
Examples
828+
--------
829+
>>> import dpnp as np
830+
>>> x = np.random.randn(20).astype("float32")
831+
>>> y = np.random.randn(20).astype("float32")
832+
>>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
833+
>>> hist.shape
834+
(4, 3)
835+
>>> hist
836+
array([[1., 2., 0.],
837+
[0., 3., 1.],
838+
[1., 4., 1.],
839+
[1., 3., 3.]], dtype=float32)
840+
>>> edges_x.shape
841+
(5,)
842+
>>> edges_x
843+
array([-1.7516936 , -0.96109843, -0.17050326, 0.62009203, 1.4106871 ],
844+
dtype=float32)
845+
>>> edges_y.shape
846+
(4,)
847+
>>> edges_y
848+
array([-2.6604428 , -0.94615364, 0.76813555, 2.4824247 ], dtype=float32)
849+
850+
Please note, that resulting values of histogram and edges may vary.
851+
852+
"""
853+
854+
dpnp.check_supported_arrays_type(x, y)
855+
if weights is not None:
856+
dpnp.check_supported_arrays_type(weights)
857+
858+
if x.ndim != 1 or y.ndim != 1:
859+
raise ValueError(
860+
f"x and y must be 1-dimensional arrays."
861+
f"Got {x.ndim} and {y.ndim} respectively"
862+
)
863+
864+
if len(x) != len(y):
865+
raise ValueError(
866+
f"x and y must have the same length."
867+
f"Got {len(x)} and {len(y)} respectively"
868+
)
869+
870+
usm_type, exec_q = get_usm_allocations([x, y, bins, range, weights])
871+
device = exec_q.sycl_device
872+
873+
sample_dtype = result_type_for_device([x.dtype, y.dtype], device)
874+
875+
# Unlike histogramdd histogram2d accepts 1d bins and
876+
# apply it to both dimensions
877+
# at the same moment two elements bins should be interpreted as
878+
# number of bins in each dimension and array-like bins with one element
879+
# is not allowed
880+
if isinstance(bins, Iterable) and len(bins) > 2:
881+
bins = [bins] * 2
882+
883+
bins = _histdd_normalize_bins(bins, 2)
884+
bins_dtypes = [sample_dtype]
885+
bins_dtypes += [b.dtype for b in bins if hasattr(b, "dtype")]
886+
887+
bins_dtype = result_type_for_device(bins_dtypes, device)
888+
hist_dtype = _histdd_hist_dtype(exec_q, weights)
889+
890+
supported_types = statistics_ext.histogramdd_dtypes()
891+
892+
sample_dtype, _ = _align_dtypes(
893+
sample_dtype, bins_dtype, hist_dtype, supported_types, device
894+
)
895+
896+
sample = dpnp.empty_like(
897+
x, shape=x.shape + (2,), dtype=sample_dtype, usm_type=usm_type
898+
)
899+
sample[:, 0] = x
900+
sample[:, 1] = y
901+
902+
hist, edges = histogramdd(
903+
sample, bins=bins, range=range, density=density, weights=weights
904+
)
905+
return hist, edges[0], edges[1]
906+
907+
754908
def _histdd_validate_bins(bins):
755909
for i, b in enumerate(bins):
756910
if numpy.ndim(b) == 0:
@@ -873,9 +1027,7 @@ def _histdd_hist_dtype(queue, weights):
8731027
# hist_dtype is either float or complex, so it is ok
8741028
# to calculate it as result type between default_float and
8751029
# weights.dtype
876-
hist_dtype = _result_type_for_device(
877-
[hist_dtype, weights.dtype], device
878-
)
1030+
hist_dtype = result_type_for_device([hist_dtype, weights.dtype], device)
8791031

8801032
return hist_dtype
8811033

@@ -886,7 +1038,7 @@ def _histdd_sample_dtype(queue, sample, bin_edges_list):
8861038
dtypes_ = [bin_edges.dtype for bin_edges in bin_edges_list]
8871039
dtypes_.append(sample.dtype)
8881040

889-
return _result_type_for_device(dtypes_, device)
1041+
return result_type_for_device(dtypes_, device)
8901042

8911043

8921044
def _histdd_supported_dtypes(sample, bin_edges_list, weights):
@@ -918,7 +1070,7 @@ def _histdd_extract_arrays(sample, weights, bins):
9181070
return all_arrays
9191071

9201072

921-
def histogramdd(sample, bins=10, range=None, weights=None, density=False):
1073+
def histogramdd(sample, bins=10, range=None, density=None, weights=None):
9221074
"""
9231075
Compute the multidimensional histogram of some data.
9241076
@@ -936,30 +1088,33 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
9361088
* The number of bins for each dimension (nx, ny, ... =bins)
9371089
* The number of bins for all dimensions (nx=ny=...=bins).
9381090
939-
Default: ``10``
1091+
Default: ``10``.
9401092
range : {None, sequence}, optional
9411093
A sequence of length D, each an optional (lower, upper) tuple giving
9421094
the outer bin edges to be used if the edges are not given explicitly in
9431095
`bins`.
944-
An entry of None in the sequence results in the minimum and maximum
1096+
An entry of ``None`` in the sequence results in the minimum and maximum
9451097
values being used for the corresponding dimension.
946-
None is equivalent to passing a tuple of D None values.
947-
948-
Default: ``None``
949-
weights : {dpnp.ndarray, usm_ndarray}, optional
950-
An (N,)-shaped array of values `w_i` weighing each sample
951-
`(x_i, y_i, z_i, ...)`.
952-
Weights are normalized to 1 if density is True. If density is False,
953-
the values of the returned histogram are equal to the sum of the
954-
weights belonging to the samples falling into each bin.
1098+
``None`` is equivalent to passing a tuple of D ``None`` values.
9551099
956-
Default: ``None``
957-
density : bool, optional
958-
If ``False``, the default, returns the number of samples in each bin.
1100+
Default: ``None``.
1101+
density : {None, bool}, optional
1102+
If ``False`` or ``None``, the default, returns the number of
1103+
samples in each bin.
9591104
If ``True``, returns the probability *density* function at the bin,
9601105
``bin_count / sample_count / bin_volume``.
9611106
962-
Default: ``False``
1107+
Default: ``None``.
1108+
weights : {None, dpnp.ndarray, usm_ndarray}, optional
1109+
An (N,)-shaped array of values `w_i` weighing each sample
1110+
`(x_i, y_i, z_i, ...)`.
1111+
Weights are normalized to ``1`` if density is ``True``.
1112+
If density is ``False``, the values of the returned histogram
1113+
are equal to the sum of the weights belonging to the samples
1114+
falling into each bin.
1115+
If ``None`` all samples are assigned a weight of ``1``.
1116+
1117+
Default: ``None``.
9631118
9641119
Returns
9651120
-------
@@ -993,7 +1148,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
9931148
elif sample.ndim > 2:
9941149
raise ValueError("sample must have no more than 2 dimensions")
9951150

996-
ndim = sample.shape[1] if sample.size > 0 else 1
1151+
ndim = sample.shape[1]
9971152

9981153
_arrays = _histdd_extract_arrays(sample, weights, bins)
9991154
usm_type, queue = get_usm_allocations(_arrays)

0 commit comments

Comments
 (0)