37
37
38
38
"""
39
39
40
+ # pylint: disable=duplicate-code
41
+
40
42
import warnings
41
43
42
44
import dpnp
@@ -955,7 +957,15 @@ def nansum(
955
957
956
958
957
959
def nanstd (
958
- a , axis = None , dtype = None , out = None , ddof = 0 , keepdims = False , * , where = True
960
+ a ,
961
+ axis = None ,
962
+ dtype = None ,
963
+ out = None ,
964
+ ddof = 0 ,
965
+ keepdims = False ,
966
+ * ,
967
+ where = True ,
968
+ mean = None ,
959
969
):
960
970
"""
961
971
Compute the standard deviation along the specified axis,
@@ -969,40 +979,52 @@ def nanstd(
969
979
Input array.
970
980
axis : {None, int, tuple of ints}, optional
971
981
Axis or axes along which the standard deviations must be computed.
972
- If a tuple of unique integers is given, the standard deviations
973
- are computed over multiple axes. If ``None``, the standard deviation
974
- is computed over the entire array.
982
+ If a tuple of unique integers is given, the standard deviations are
983
+ computed over multiple axes. If ``None``, the standard deviation is
984
+ computed over the entire array.
985
+
975
986
Default: ``None``.
976
987
dtype : {None, dtype}, optional
977
- Type to use in computing the standard deviation. By default,
978
- if `a` has a floating-point data type, the returned array
979
- will have the same data type as `a`.
980
- If `a` has a boolean or integral data type, the returned array
981
- will have the default floating point data type for the device
988
+ Type to use in computing the standard deviation. By default, if `a` has
989
+ a floating-point data type, the returned array will have the same data
990
+ type as `a`. If `a` has a boolean or integral data type, the returned
991
+ array will have the default floating point data type for the device
982
992
where input array `a` is allocated.
993
+
994
+ Default: ``None``.
983
995
out : {None, dpnp.ndarray, usm_ndarray}, optional
984
996
Alternative output array in which to place the result. It must have
985
997
the same shape as the expected output but the type (of the calculated
986
998
values) will be cast if necessary.
999
+
1000
+ Default: ``None``.
987
1001
ddof : {int, float}, optional
988
- Means Delta Degrees of Freedom. The divisor used in calculations
989
- is ``N - ddof``, where ``N`` the number of non-NaN elements.
990
- Default: `0.0`.
1002
+ Means Delta Degrees of Freedom. The divisor used in calculations is
1003
+ ``N - ddof``, where ``N`` the number of non-NaN elements.
1004
+
1005
+ Default: ``0.0``.
991
1006
keepdims : {None, bool}, optional
992
1007
If ``True``, the reduced axes (dimensions) are included in the result
993
- as singleton dimensions, so that the returned array remains
994
- compatible with the input array according to Array Broadcasting
995
- rules. Otherwise, if ``False``, the reduced axes are not included in
996
- the returned array. Default: ``False``.
1008
+ as singleton dimensions, so that the returned array remains compatible
1009
+ with the input array according to Array Broadcasting rules. Otherwise,
1010
+ if ``False``, the reduced axes are not included in the returned array.
1011
+
1012
+ Default: ``False``.
1013
+ mean : {dpnp.ndarray, usm_ndarray}, optional
1014
+ Provide the mean to prevent its recalculation. The mean should have
1015
+ a shape as if it was calculated with ``keepdims=True``.
1016
+ The axis for the calculation of the mean should be the same as used in
1017
+ the call to this `nanstd` function.
1018
+
1019
+ Default: ``None``.
997
1020
998
1021
Returns
999
1022
-------
1000
1023
out : dpnp.ndarray
1001
- An array containing the standard deviations. If the standard
1002
- deviation was computed over the entire array, a zero-dimensional
1003
- array is returned. If `ddof` is >= the number of non-NaN elements
1004
- in a slice or the slice contains only NaNs, then the result for
1005
- that slice is NaN.
1024
+ An array containing the standard deviations. If the standard deviation
1025
+ was computed over the entire array, a zero-dimensional array is
1026
+ returned. If `ddof` is >= the number of non-NaN elements in a slice or
1027
+ the slice contains only NaNs, then the result for that slice is NaN.
1006
1028
1007
1029
Limitations
1008
1030
-----------
@@ -1011,6 +1033,19 @@ def nanstd(
1011
1033
1012
1034
Notes
1013
1035
-----
1036
+ The standard deviation is the square root of the average of the squared
1037
+ deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
1038
+
1039
+ The average squared deviation is normally calculated as ``x.sum() / N``,
1040
+ where ``N = len(x)``. If, however, `ddof` is specified, the divisor
1041
+ ``N - ddof`` is used instead. In standard statistical practice, ``ddof=1``
1042
+ provides an unbiased estimator of the variance of the infinite population.
1043
+ ``ddof=0`` provides a maximum likelihood estimate of the variance for
1044
+ normally distributed variables.
1045
+ The standard deviation computed in this function is the square root of
1046
+ the estimated variance, so even with ``ddof=1``, it will not be an unbiased
1047
+ estimate of the standard deviation per se.
1048
+
1014
1049
Note that, for complex numbers, the absolute value is taken before
1015
1050
squaring, so that the result is always real and non-negative.
1016
1051
@@ -1029,11 +1064,18 @@ def nanstd(
1029
1064
>>> import dpnp as np
1030
1065
>>> a = np.array([[1, np.nan], [3, 4]])
1031
1066
>>> np.nanstd(a)
1032
- array(1.247219128924647 )
1067
+ array(1.24721913 )
1033
1068
>>> np.nanstd(a, axis=0)
1034
- array([1., 0.])
1069
+ array([1., 0.])
1035
1070
>>> np.nanstd(a, axis=1)
1036
- array([0., 0.5]) # may vary
1071
+ array([0. , 0.5]) # may vary
1072
+
1073
+ Using the mean keyword to save computation time:
1074
+
1075
+ >>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]])
1076
+ >>> mean = np.nanmean(a, axis=1, keepdims=True)
1077
+ >>> np.nanstd(a, axis=1, mean=mean)
1078
+ array([2.49443826, 1.47901995, 4.0824829 ])
1037
1079
1038
1080
"""
1039
1081
@@ -1051,13 +1093,21 @@ def nanstd(
1051
1093
ddof = ddof ,
1052
1094
keepdims = keepdims ,
1053
1095
where = where ,
1096
+ mean = mean ,
1054
1097
)
1055
- dpnp .sqrt (res , out = res )
1056
- return res
1098
+ return dpnp .sqrt (res , out = res )
1057
1099
1058
1100
1059
1101
def nanvar (
1060
- a , axis = None , dtype = None , out = None , ddof = 0 , keepdims = False , * , where = True
1102
+ a ,
1103
+ axis = None ,
1104
+ dtype = None ,
1105
+ out = None ,
1106
+ ddof = 0 ,
1107
+ keepdims = False ,
1108
+ * ,
1109
+ where = True ,
1110
+ mean = None ,
1061
1111
):
1062
1112
"""
1063
1113
Compute the variance along the specified axis, while ignoring NaNs.
@@ -1069,39 +1119,52 @@ def nanvar(
1069
1119
a : {dpnp.ndarray, usm_ndarray}
1070
1120
Input array.
1071
1121
axis : {None, int, tuple of ints}, optional
1072
- axis or axes along which the variances must be computed. If a tuple
1122
+ Axis or axes along which the variances must be computed. If a tuple
1073
1123
of unique integers is given, the variances are computed over multiple
1074
1124
axes. If ``None``, the variance is computed over the entire array.
1125
+
1075
1126
Default: ``None``.
1076
1127
dtype : {None, dtype}, optional
1077
1128
Type to use in computing the variance. By default, if `a` has a
1078
1129
floating-point data type, the returned array will have
1079
- the same data type as `a`.
1080
- If `a` has a boolean or integral data type, the returned array
1081
- will have the default floating point data type for the device
1082
- where input array `a` is allocated.
1130
+ the same data type as `a`. If `a` has a boolean or integral data type,
1131
+ the returned array will have the default floating point data type for
1132
+ the device where input array `a` is allocated.
1133
+
1134
+ Default: ``None``.
1083
1135
out : {None, dpnp.ndarray, usm_ndarray}, optional
1084
1136
Alternative output array in which to place the result. It must have
1085
1137
the same shape as the expected output but the type (of the calculated
1086
1138
values) will be cast if necessary.
1139
+
1140
+ Default: ``None``.
1087
1141
ddof : {int, float}, optional
1088
- Means Delta Degrees of Freedom. The divisor used in calculations
1089
- is ``N - ddof``, where ``N`` represents the number of non-NaN elements.
1090
- Default: `0.0`.
1142
+ Means Delta Degrees of Freedom. The divisor used in calculations is
1143
+ ``N - ddof``, where ``N`` represents the number of non-NaN elements.
1144
+
1145
+ Default: ``0.0``.
1091
1146
keepdims : {None, bool}, optional
1092
1147
If ``True``, the reduced axes (dimensions) are included in the result
1093
- as singleton dimensions, so that the returned array remains
1094
- compatible with the input array according to Array Broadcasting
1095
- rules. Otherwise, if ``False``, the reduced axes are not included in
1096
- the returned array. Default: ``False``.
1148
+ as singleton dimensions, so that the returned array remains compatible
1149
+ with the input array according to Array Broadcasting rules. Otherwise,
1150
+ if ``False``, the reduced axes are not included in the returned array.
1151
+
1152
+ Default: ``False``.
1153
+ mean : {dpnp.ndarray, usm_ndarray}, optional
1154
+ Provide the mean to prevent its recalculation. The mean should have
1155
+ a shape as if it was calculated with ``keepdims=True``.
1156
+ The axis for the calculation of the mean should be the same as used in
1157
+ the call to this `nanvar` function.
1158
+
1159
+ Default: ``None``.
1097
1160
1098
1161
Returns
1099
1162
-------
1100
1163
out : dpnp.ndarray
1101
- An array containing the variances. If the variance was computed
1102
- over the entire array, a zero-dimensional array is returned.
1103
- If `ddof` is >= the number of non-NaN elements in a slice or the
1104
- slice contains only NaNs, then the result for that slice is NaN.
1164
+ An array containing the variances. If the variance was computed over
1165
+ the entire array, a zero-dimensional array is returned. If `ddof` is >=
1166
+ the number of non-NaN elements in a slice or the slice contains only
1167
+ NaNs, then the result for that slice is NaN.
1105
1168
1106
1169
Limitations
1107
1170
-----------
@@ -1110,6 +1173,16 @@ def nanvar(
1110
1173
1111
1174
Notes
1112
1175
-----
1176
+ The variance is the average of the squared deviations from the mean,
1177
+ that is ``var = mean(abs(x - x.mean())**2)``.
1178
+
1179
+ The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
1180
+ If, however, `ddof` is specified, the divisor ``N - ddof`` is used instead.
1181
+ In standard statistical practice, ``ddof=1`` provides an unbiased estimator
1182
+ of the variance of a hypothetical infinite population. ``ddof=0`` provides
1183
+ a maximum likelihood estimate of the variance for normally distributed
1184
+ variables.
1185
+
1113
1186
Note that, for complex numbers, the absolute value is taken before squaring,
1114
1187
so that the result is always real and non-negative.
1115
1188
@@ -1127,11 +1200,18 @@ def nanvar(
1127
1200
>>> import dpnp as np
1128
1201
>>> a = np.array([[1, np.nan], [3, 4]])
1129
1202
>>> np.nanvar(a)
1130
- array(1.5555555555555554 )
1203
+ array(1.55555556 )
1131
1204
>>> np.nanvar(a, axis=0)
1132
- array([1., 0.])
1205
+ array([1., 0.])
1133
1206
>>> np.nanvar(a, axis=1)
1134
- array([0., 0.25]) # may vary
1207
+ array([0. , 0.25]) # may vary
1208
+
1209
+ Using the mean keyword to save computation time:
1210
+
1211
+ >>> a = np.array([[14, 8, np.nan, 10], [7, 9, 10, 11], [np.nan, 15, 5, 10]])
1212
+ >>> mean = np.nanmean(a, axis=1, keepdims=True)
1213
+ >>> np.nanvar(a, axis=1, mean=mean)
1214
+ array([ 6.22222222, 2.1875 , 16.66666667])
1135
1215
1136
1216
"""
1137
1217
@@ -1157,46 +1237,51 @@ def nanvar(
1157
1237
dtype = dpnp .dtype (dtype )
1158
1238
if not dpnp .issubdtype (dtype , dpnp .inexact ):
1159
1239
raise TypeError ("If input is inexact, then dtype must be inexact." )
1240
+
1160
1241
if out is not None :
1161
1242
dpnp .check_supported_arrays_type (out )
1162
1243
if not dpnp .issubdtype (out .dtype , dpnp .inexact ):
1163
1244
raise TypeError ("If input is inexact, then out must be inexact." )
1164
1245
1165
1246
# Compute mean
1166
- var_dtype = a .real .dtype if dtype is None else dtype
1167
1247
cnt = dpnp .sum (
1168
- ~ mask , axis = axis , dtype = var_dtype , keepdims = True , where = where
1248
+ ~ mask , axis = axis , dtype = dpnp . intp , keepdims = True , where = where
1169
1249
)
1170
- avg = dpnp .sum (arr , axis = axis , dtype = dtype , keepdims = True , where = where )
1171
- avg = dpnp .divide (avg , cnt , out = avg )
1172
1250
1173
- # Compute squared deviation from mean.
1251
+ if mean is not None :
1252
+ avg = mean
1253
+ else :
1254
+ avg = dpnp .sum (arr , axis = axis , dtype = dtype , keepdims = True , where = where )
1255
+ avg = dpnp .divide (avg , cnt , out = avg )
1256
+
1257
+ # Compute squared deviation from mean
1174
1258
if arr .dtype == avg .dtype :
1175
1259
arr = dpnp .subtract (arr , avg , out = arr )
1176
1260
else :
1177
1261
arr = dpnp .subtract (arr , avg )
1178
1262
dpnp .copyto (arr , 0.0 , where = mask )
1263
+
1179
1264
if dpnp .issubdtype (arr .dtype , dpnp .complexfloating ):
1180
1265
sqr = dpnp .multiply (arr , arr .conj (), out = arr ).real
1181
1266
else :
1182
- sqr = dpnp .multiply ( arr , arr , out = arr )
1267
+ sqr = dpnp .square ( arr , out = arr )
1183
1268
1184
1269
# Compute variance
1185
1270
var = dpnp .sum (
1186
1271
sqr ,
1187
1272
axis = axis ,
1188
- dtype = var_dtype ,
1273
+ dtype = dtype ,
1189
1274
out = out ,
1190
1275
keepdims = keepdims ,
1191
1276
where = where ,
1192
1277
)
1193
1278
1194
1279
if var .ndim < cnt .ndim :
1195
1280
cnt = cnt .squeeze (axis )
1196
- cnt -= ddof
1197
- dpnp .divide (var , cnt , out = var )
1281
+ dof = cnt - ddof
1282
+ dpnp .divide (var , dof , out = var )
1198
1283
1199
- isbad = cnt <= 0
1284
+ isbad = dof <= 0
1200
1285
if dpnp .any (isbad ):
1201
1286
# NaN, inf, or negative numbers are all possible bad
1202
1287
# values, so explicitly replace them with NaN.
0 commit comments