Skip to content

Commit ce461d1

Browse files
committed
use nogil in aggregations and groupby
1 parent a046606 commit ce461d1

File tree

2 files changed

+83
-53
lines changed

2 files changed

+83
-53
lines changed

pandas/_libs/groupby.pyx

Lines changed: 56 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,14 @@ from pandas._libs.missing cimport checknull
5151
cdef int64_t NPY_NAT = get_nat()
5252
_int64_max = np.iinfo(np.int64).max
5353

54-
cdef float64_t NaN = <float64_t>np.NaN
54+
cdef:
55+
float32_t MINfloat32 = np.NINF
56+
float64_t MINfloat64 = np.NINF
57+
58+
float32_t MAXfloat32 = np.inf
59+
float64_t MAXfloat64 = np.inf
60+
61+
float64_t NaN = <float64_t>np.NaN
5562

5663
cdef enum InterpolationEnumType:
5764
INTERPOLATION_LINEAR,
@@ -240,39 +247,58 @@ def group_cumsum(numeric_t[:, ::1] out,
240247
accum = np.zeros((ngroups, K), dtype=np.asarray(values).dtype)
241248
compensation = np.zeros((ngroups, K), dtype=np.asarray(values).dtype)
242249

243-
for i in range(N):
244-
lab = labels[i]
250+
with nogil:
251+
for i in range(N):
252+
lab = labels[i]
245253

246-
if lab < 0:
247-
continue
248-
for j in range(K):
249-
val = values[i, j]
254+
if lab < 0:
255+
continue
256+
for j in range(K):
257+
val = values[i, j]
250258

251-
# For floats, use Kahan summation to reduce floating-point
252-
# error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
253-
if numeric_t == float32_t or numeric_t == float64_t:
254-
if np.isinf(val):
255-
out[i, j] = val
256-
accum[lab, j] = val
257-
break
258-
elif val == val:
259-
y = val - compensation[lab, j]
260-
t = accum[lab, j] + y
261-
compensation[lab, j] = t - accum[lab, j] - y
262-
accum[lab, j] = t
263-
out[i, j] = t
264-
if np.isinf(t):
265-
compensation[lab, j] = 0
259+
# For floats, use Kahan summation to reduce floating-point
260+
# error (https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
261+
if numeric_t == float32_t:
262+
if (val == MAXfloat32) or (val == MINfloat32):
263+
if (t == MAXfloat32) or (t == MINfloat32):
264+
val = t
265+
out[i, j] = val
266+
accum[lab, j] = val
266267
break
267-
else:
268-
out[i, j] = NaN
269-
if not skipna:
270-
accum[lab, j] = NaN
268+
elif val == val:
269+
y = val - compensation[lab, j]
270+
t = accum[lab, j] + y
271+
compensation[lab, j] = t - accum[lab, j] - y
272+
accum[lab, j] = t
273+
out[i, j] = t
274+
else:
275+
out[i, j] = NaN
276+
if not skipna:
277+
accum[lab, j] = NaN
278+
break
279+
elif numeric_t == float64_t:
280+
if (val == MAXfloat64) or (val == MINfloat64):
281+
out[i, j] = val
282+
accum[lab, j] = val
271283
break
272-
else:
273-
t = val + accum[lab, j]
274-
accum[lab, j] = t
275-
out[i, j] = t
284+
elif val == val:
285+
y = val - compensation[lab, j]
286+
t = accum[lab, j] + y
287+
compensation[lab, j] = t - accum[lab, j] - y
288+
accum[lab, j] = t
289+
out[i, j] = t
290+
if (t == MAXfloat64) or (t == MINfloat64):
291+
compensation[lab, j] = 0
292+
break
293+
else:
294+
out[i, j] = NaN
295+
if not skipna:
296+
accum[lab, j] = NaN
297+
break
298+
else:
299+
t = val + accum[lab, j]
300+
accum[lab, j] = t
301+
out[i, j] = t
276302

277303

278304
@cython.boundscheck(False)

pandas/_libs/window/aggregations.pyx

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogi
8787

8888

8989
cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
90-
float64_t *compensation):
90+
float64_t *compensation) nogil:
9191
""" add a value from the sum calc using Kahan summation """
9292

9393
cdef:
@@ -100,14 +100,14 @@ cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
100100
t = sum_x[0] + y
101101
compensation[0] = t - sum_x[0] - y
102102
sum_x[0] = t
103-
if np.isinf(val):
103+
if (val == MINfloat64) or (val == MAXfloat64):
104104
sum_x[0] = val
105105
nobs[0] = nobs[0] + 1
106106
compensation[0] = 0
107107

108108

109109
cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
110-
float64_t *compensation):
110+
float64_t *compensation) nogil:
111111
""" remove a value from the sum calc using Kahan summation """
112112

113113
cdef:
@@ -120,7 +120,7 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
120120
t = sum_x[0] + y
121121
compensation[0] = t - sum_x[0] - y
122122
sum_x[0] = t
123-
if np.isinf(val):
123+
if (val == MINfloat64) or (val == MAXfloat64):
124124
sum_x[0] = val
125125
nobs[0] = nobs[0] - 1
126126
compensation[0] = 0
@@ -141,31 +141,35 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
141141
)
142142
output = np.empty(N, dtype=np.float64)
143143

144-
for i in range(0, N):
145-
s = start[i]
146-
e = end[i]
144+
with nogil:
147145

148-
if i == 0 or not is_monotonic_increasing_bounds:
146+
for i in range(0, N):
147+
s = start[i]
148+
e = end[i]
149149

150-
# setup
151-
for j in range(s, e):
152-
add_sum(values[j], &nobs, &sum_x, &compensation_add)
150+
if i == 0 or not is_monotonic_increasing_bounds:
153151

154-
else:
155-
# calculate deletes
156-
for j in range(start[i - 1], s):
157-
remove_sum(values[j], &nobs, &sum_x, &compensation_remove)
152+
# setup
153+
154+
for j in range(s, e):
155+
add_sum(values[j], &nobs, &sum_x, &compensation_add)
158156

159-
# calculate adds
160-
for j in range(end[i - 1], e):
161-
add_sum(values[j], &nobs, &sum_x, &compensation_add)
157+
else:
158+
159+
# calculate deletes
160+
for j in range(start[i - 1], s):
161+
remove_sum(values[j], &nobs, &sum_x, &compensation_remove)
162162

163-
output[i] = calc_sum(minp, nobs, sum_x)
163+
# calculate adds
164+
for j in range(end[i - 1], e):
165+
add_sum(values[j], &nobs, &sum_x, &compensation_add)
164166

165-
if not is_monotonic_increasing_bounds:
166-
nobs = 0
167-
sum_x = 0.0
168-
compensation_remove = 0.0
167+
output[i] = calc_sum(minp, nobs, sum_x)
168+
169+
if not is_monotonic_increasing_bounds:
170+
nobs = 0
171+
sum_x = 0.0
172+
compensation_remove = 0.0
169173

170174
return output
171175

0 commit comments

Comments
 (0)