42
42
43
43
import dpctl .utils as dpu
44
44
import numpy
45
- from dpctl .tensor ._type_utils import _can_cast
46
45
47
46
import dpnp
48
47
49
48
# pylint: disable=no-name-in-module
50
49
import dpnp .backend .extensions .statistics ._statistics_impl as statistics_ext
50
+ from dpnp .dpnp_utils .dpnp_utils_common import (
51
+ result_type_for_device ,
52
+ to_supported_dtypes ,
53
+ )
51
54
52
55
# pylint: disable=no-name-in-module
53
- from .dpnp_utils import get_usm_allocations , map_dtype_to_device
56
+ from .dpnp_utils import get_usm_allocations
54
57
55
58
__all__ = [
56
59
"bincount" ,
57
60
"digitize" ,
58
61
"histogram" ,
59
62
"histogram_bin_edges" ,
63
+ "histogram2d" ,
60
64
"histogramdd" ,
61
65
]
62
66
65
69
_range = range
66
70
67
71
68
- def _result_type_for_device (dtypes , device ):
69
- rt = dpnp .result_type (* dtypes )
70
- return map_dtype_to_device (rt , device )
71
-
72
-
73
72
def _align_dtypes (a_dtype , bins_dtype , ntype , supported_types , device ):
74
- has_fp64 = device .has_aspect_fp64
75
- has_fp16 = device .has_aspect_fp16
76
-
77
- a_bin_dtype = _result_type_for_device ([a_dtype , bins_dtype ], device )
73
+ a_bin_dtype = result_type_for_device ([a_dtype , bins_dtype ], device )
78
74
79
75
# histogram implementation doesn't support uint64 as histogram type
80
76
# we can use int64 instead. Result would be correct even in case of overflow
81
77
if ntype == numpy .uint64 :
82
78
ntype = dpnp .int64
83
79
84
- if (a_bin_dtype , ntype ) in supported_types :
85
- return a_bin_dtype , ntype
86
-
87
- for sample_type , hist_type in supported_types :
88
- if _can_cast (
89
- a_bin_dtype , sample_type , has_fp16 , has_fp64
90
- ) and _can_cast (ntype , hist_type , has_fp16 , has_fp64 ):
91
- return sample_type , hist_type
92
-
93
- # should not happen
94
- return None , None # pragma: no cover
80
+ return to_supported_dtypes ([a_bin_dtype , ntype ], supported_types , device )
95
81
96
82
97
83
def _ravel_check_a_and_weights (a , weights ):
@@ -138,6 +124,9 @@ def _is_finite(a):
138
124
return numpy .isfinite (a )
139
125
140
126
if range is not None :
127
+ if len (range ) != 2 :
128
+ raise ValueError ("range argument must consist of 2 elements." )
129
+
141
130
first_edge , last_edge = range
142
131
if first_edge > last_edge :
143
132
raise ValueError ("max must be larger than min in range parameter." )
@@ -520,6 +509,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
520
509
If `bins` is a sequence, it defines a monotonically increasing array
521
510
of bin edges, including the rightmost edge, allowing for non-uniform
522
511
bin widths.
512
+
523
513
Default: ``10``.
524
514
range : {None, 2-tuple of float}, optional
525
515
The lower and upper range of the bins. If not provided, range is simply
@@ -528,6 +518,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
528
518
affects the automatic bin computation as well. While bin width is
529
519
computed to be optimal based on the actual data within `range`, the bin
530
520
count will fill the entire range including portions containing no data.
521
+
531
522
Default: ``None``.
532
523
density : {None, bool}, optional
533
524
If ``False`` or ``None``, the result will contain the number of samples
@@ -536,6 +527,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
536
527
the range is ``1``. Note that the sum of the histogram values will not
537
528
be equal to ``1`` unless bins of unity width are chosen; it is not
538
529
a probability *mass* function.
530
+
539
531
Default: ``None``.
540
532
weights : {None, dpnp.ndarray, usm_ndarray}, optional
541
533
An array of weights, of the same shape as `a`. Each value in `a` only
@@ -545,6 +537,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
545
537
Please note that the ``dtype`` of `weights` will also become the
546
538
``dtype`` of the returned accumulator (`hist`), so it must be large
547
539
enough to hold accumulated values as well.
540
+
548
541
Default: ``None``.
549
542
550
543
Returns
@@ -751,6 +744,167 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
751
744
return bin_edges
752
745
753
746
747
+ def histogram2d (x , y , bins = 10 , range = None , density = None , weights = None ):
748
+ """
749
+ Compute the bi-dimensional histogram of two data samples.
750
+
751
+ Parameters
752
+ ----------
753
+ x : {dpnp.ndarray, usm_ndarray} of shape (N,)
754
+ An array containing the `x` coordinates of the points to be
755
+ histogrammed.
756
+ y : {dpnp.ndarray, usm_ndarray} of shape (N,)
757
+ An array containing the `y` coordinates of the points to be
758
+ histogrammed.
759
+ bins : {int, dpnp.ndarray, usm_ndarray, [int, int], [array, array], \
760
+ [int, array], [array, int]}, optional
761
+
762
+ The bins specification:
763
+
764
+ * If int, the number of bins for the two dimensions (nx=ny=bins).
765
+ * If array, the bin edges for the two dimensions
766
+ (x_edges=y_edges=bins).
767
+ * If [int, int], the number of bins in each dimension
768
+ (nx, ny = bins).
769
+ * If [array, array], the bin edges in each dimension
770
+ (x_edges, y_edges = bins).
771
+ * A combination [int, array] or [array, int], where int
772
+ is the number of bins and array is the bin edges.
773
+
774
+ Default: ``10``.
775
+ range : {None, dpnp.ndarray, usm_ndarray} of shape (2,2), optional
776
+ The leftmost and rightmost edges of the bins along each dimension
777
+ If ``None`` the ranges are
778
+ ``[[x.min(), x.max()], [y.min(), y.max()]]``. All values outside
779
+ of this range will be considered outliers and not tallied in the
780
+ histogram.
781
+
782
+ Default: ``None``.
783
+ density : {None, bool}, optional
784
+ If ``False`` or ``None``, the default, returns the number of
785
+ samples in each bin.
786
+ If ``True``, returns the probability *density* function at the bin,
787
+ ``bin_count / sample_count / bin_volume``.
788
+
789
+ Default: ``None``.
790
+ weights : {None, dpnp.ndarray, usm_ndarray} of shape (N,), optional
791
+ An array of values ``w_i`` weighing each sample ``(x_i, y_i)``.
792
+ Weights are normalized to ``1`` if `density` is ``True``.
793
+ If `density` is ``False``, the values of the returned histogram
794
+ are equal to the sum of the weights belonging to the samples
795
+ falling into each bin.
796
+ If ``None`` all samples are assigned a weight of ``1``.
797
+
798
+ Default: ``None``.
799
+ Returns
800
+ -------
801
+ H : dpnp.ndarray of shape (nx, ny)
802
+ The bi-dimensional histogram of samples `x` and `y`. Values in `x`
803
+ are histogrammed along the first dimension and values in `y` are
804
+ histogrammed along the second dimension.
805
+ xedges : dpnp.ndarray of shape (nx+1,)
806
+ The bin edges along the first dimension.
807
+ yedges : dpnp.ndarray of shape (ny+1,)
808
+ The bin edges along the second dimension.
809
+
810
+ See Also
811
+ --------
812
+ :obj:`dpnp.histogram` : 1D histogram
813
+ :obj:`dpnp.histogramdd` : Multidimensional histogram
814
+
815
+ Notes
816
+ -----
817
+ When `density` is ``True``, then the returned histogram is the sample
818
+ density, defined such that the sum over bins of the product
819
+ ``bin_value * bin_area`` is 1.
820
+
821
+ Please note that the histogram does not follow the Cartesian convention
822
+ where `x` values are on the abscissa and `y` values on the ordinate
823
+ axis. Rather, `x` is histogrammed along the first dimension of the
824
+ array (vertical), and `y` along the second dimension of the array
825
+ (horizontal). This ensures compatibility with `histogramdd`.
826
+
827
+ Examples
828
+ --------
829
+ >>> import dpnp as np
830
+ >>> x = np.random.randn(20).astype("float32")
831
+ >>> y = np.random.randn(20).astype("float32")
832
+ >>> hist, edges_x, edges_y = np.histogram2d(x, y, bins=(4, 3))
833
+ >>> hist.shape
834
+ (4, 3)
835
+ >>> hist
836
+ array([[1., 2., 0.],
837
+ [0., 3., 1.],
838
+ [1., 4., 1.],
839
+ [1., 3., 3.]], dtype=float32)
840
+ >>> edges_x.shape
841
+ (5,)
842
+ >>> edges_x
843
+ array([-1.7516936 , -0.96109843, -0.17050326, 0.62009203, 1.4106871 ],
844
+ dtype=float32)
845
+ >>> edges_y.shape
846
+ (4,)
847
+ >>> edges_y
848
+ array([-2.6604428 , -0.94615364, 0.76813555, 2.4824247 ], dtype=float32)
849
+
850
+ Please note, that resulting values of histogram and edges may vary.
851
+
852
+ """
853
+
854
+ dpnp .check_supported_arrays_type (x , y )
855
+ if weights is not None :
856
+ dpnp .check_supported_arrays_type (weights )
857
+
858
+ if x .ndim != 1 or y .ndim != 1 :
859
+ raise ValueError (
860
+ f"x and y must be 1-dimensional arrays."
861
+ f"Got { x .ndim } and { y .ndim } respectively"
862
+ )
863
+
864
+ if len (x ) != len (y ):
865
+ raise ValueError (
866
+ f"x and y must have the same length."
867
+ f"Got { len (x )} and { len (y )} respectively"
868
+ )
869
+
870
+ usm_type , exec_q = get_usm_allocations ([x , y , bins , range , weights ])
871
+ device = exec_q .sycl_device
872
+
873
+ sample_dtype = result_type_for_device ([x .dtype , y .dtype ], device )
874
+
875
+ # Unlike histogramdd histogram2d accepts 1d bins and
876
+ # apply it to both dimensions
877
+ # at the same moment two elements bins should be interpreted as
878
+ # number of bins in each dimension and array-like bins with one element
879
+ # is not allowed
880
+ if isinstance (bins , Iterable ) and len (bins ) > 2 :
881
+ bins = [bins ] * 2
882
+
883
+ bins = _histdd_normalize_bins (bins , 2 )
884
+ bins_dtypes = [sample_dtype ]
885
+ bins_dtypes += [b .dtype for b in bins if hasattr (b , "dtype" )]
886
+
887
+ bins_dtype = result_type_for_device (bins_dtypes , device )
888
+ hist_dtype = _histdd_hist_dtype (exec_q , weights )
889
+
890
+ supported_types = statistics_ext .histogramdd_dtypes ()
891
+
892
+ sample_dtype , _ = _align_dtypes (
893
+ sample_dtype , bins_dtype , hist_dtype , supported_types , device
894
+ )
895
+
896
+ sample = dpnp .empty_like (
897
+ x , shape = x .shape + (2 ,), dtype = sample_dtype , usm_type = usm_type
898
+ )
899
+ sample [:, 0 ] = x
900
+ sample [:, 1 ] = y
901
+
902
+ hist , edges = histogramdd (
903
+ sample , bins = bins , range = range , density = density , weights = weights
904
+ )
905
+ return hist , edges [0 ], edges [1 ]
906
+
907
+
754
908
def _histdd_validate_bins (bins ):
755
909
for i , b in enumerate (bins ):
756
910
if numpy .ndim (b ) == 0 :
@@ -873,9 +1027,7 @@ def _histdd_hist_dtype(queue, weights):
873
1027
# hist_dtype is either float or complex, so it is ok
874
1028
# to calculate it as result type between default_float and
875
1029
# weights.dtype
876
- hist_dtype = _result_type_for_device (
877
- [hist_dtype , weights .dtype ], device
878
- )
1030
+ hist_dtype = result_type_for_device ([hist_dtype , weights .dtype ], device )
879
1031
880
1032
return hist_dtype
881
1033
@@ -886,7 +1038,7 @@ def _histdd_sample_dtype(queue, sample, bin_edges_list):
886
1038
dtypes_ = [bin_edges .dtype for bin_edges in bin_edges_list ]
887
1039
dtypes_ .append (sample .dtype )
888
1040
889
- return _result_type_for_device (dtypes_ , device )
1041
+ return result_type_for_device (dtypes_ , device )
890
1042
891
1043
892
1044
def _histdd_supported_dtypes (sample , bin_edges_list , weights ):
@@ -918,7 +1070,7 @@ def _histdd_extract_arrays(sample, weights, bins):
918
1070
return all_arrays
919
1071
920
1072
921
- def histogramdd (sample , bins = 10 , range = None , weights = None , density = False ):
1073
+ def histogramdd (sample , bins = 10 , range = None , density = None , weights = None ):
922
1074
"""
923
1075
Compute the multidimensional histogram of some data.
924
1076
@@ -936,30 +1088,33 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
936
1088
* The number of bins for each dimension (nx, ny, ... =bins)
937
1089
* The number of bins for all dimensions (nx=ny=...=bins).
938
1090
939
- Default: ``10``
1091
+ Default: ``10``.
940
1092
range : {None, sequence}, optional
941
1093
A sequence of length D, each an optional (lower, upper) tuple giving
942
1094
the outer bin edges to be used if the edges are not given explicitly in
943
1095
`bins`.
944
- An entry of None in the sequence results in the minimum and maximum
1096
+ An entry of `` None`` in the sequence results in the minimum and maximum
945
1097
values being used for the corresponding dimension.
946
- None is equivalent to passing a tuple of D None values.
947
-
948
- Default: ``None``
949
- weights : {dpnp.ndarray, usm_ndarray}, optional
950
- An (N,)-shaped array of values `w_i` weighing each sample
951
- `(x_i, y_i, z_i, ...)`.
952
- Weights are normalized to 1 if density is True. If density is False,
953
- the values of the returned histogram are equal to the sum of the
954
- weights belonging to the samples falling into each bin.
1098
+ ``None`` is equivalent to passing a tuple of D ``None`` values.
955
1099
956
- Default: ``None``
957
- density : bool, optional
958
- If ``False``, the default, returns the number of samples in each bin.
1100
+ Default: ``None``.
1101
+ density : {None, bool}, optional
1102
+ If ``False`` or ``None``, the default, returns the number of
1103
+ samples in each bin.
959
1104
If ``True``, returns the probability *density* function at the bin,
960
1105
``bin_count / sample_count / bin_volume``.
961
1106
962
- Default: ``False``
1107
+ Default: ``None``.
1108
+ weights : {None, dpnp.ndarray, usm_ndarray}, optional
1109
+ An (N,)-shaped array of values `w_i` weighing each sample
1110
+ `(x_i, y_i, z_i, ...)`.
1111
+ Weights are normalized to ``1`` if density is ``True``.
1112
+ If density is ``False``, the values of the returned histogram
1113
+ are equal to the sum of the weights belonging to the samples
1114
+ falling into each bin.
1115
+ If ``None`` all samples are assigned a weight of ``1``.
1116
+
1117
+ Default: ``None``.
963
1118
964
1119
Returns
965
1120
-------
@@ -993,7 +1148,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False):
993
1148
elif sample .ndim > 2 :
994
1149
raise ValueError ("sample must have no more than 2 dimensions" )
995
1150
996
- ndim = sample .shape [1 ] if sample . size > 0 else 1
1151
+ ndim = sample .shape [1 ]
997
1152
998
1153
_arrays = _histdd_extract_arrays (sample , weights , bins )
999
1154
usm_type , queue = get_usm_allocations (_arrays )
0 commit comments