Skip to content

Commit 344d53f

Browse files
committed
WIP: fixed main issue but tests still failing
1 parent 59d031f commit 344d53f

File tree

5 files changed

+43
-21
lines changed

5 files changed

+43
-21
lines changed

pandas/_libs/groupby.pyx

Lines changed: 38 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ def group_add(add_t[:, ::1] out,
517517
N, K = (<object>values).shape
518518

519519
if add_t is object:
520+
print('IN GROUPADD: Val')
520521
# NB: this does not use 'compensation' like the non-object track does.
521522
for i in range(N):
522523
lab = labels[i]
@@ -546,30 +547,46 @@ def group_add(add_t[:, ::1] out,
546547
else:
547548
out[i, j] = sumx[i, j]
548549
else:
549-
with nogil:
550-
for i in range(N):
551-
lab = labels[i]
552-
if lab < 0:
553-
continue
550+
# print('IN GROUPADD wihtout gil: Val. THIS IS N ' + str(N))
551+
for i in range(N):
552+
lab = labels[i]
553+
# print('IN GROUPADD without gil: lab ' + str(lab) + ' WITH MINCOUNT ' + str(min_count))
554+
if lab < 0:
555+
continue
554556

555-
counts[lab] += 1
556-
for j in range(K):
557-
val = values[i, j]
557+
counts[lab] += 1
558+
for j in range(K):
559+
val = values[i, j]
558560

559-
# not nan
560-
if val == val:
561-
nobs[lab, j] += 1
562-
y = val - compensation[lab, j]
563-
t = sumx[lab, j] + y
564-
compensation[lab, j] = t - sumx[lab, j] - y
565-
sumx[lab, j] = t
561+
# not nan
562+
# print('this is val ' + str(val))
563+
# print('this is val == val ' + str(val == val))
564+
if np.isinf(val):
565+
# print('val is INF or nan')
566+
sumx[lab, j] = val
567+
break
568+
elif val == val:
569+
nobs[lab, j] += 1
570+
# print('before adding val ' + str(val))
571+
y = val - compensation[lab, j]
572+
t = sumx[lab, j] + y
573+
compensation[lab, j] = t - sumx[lab, j] - y
574+
sumx[lab, j] = t
575+
# print('after adding val ' + str(sumx[lab, j]))
576+
# val is nan
577+
else:
578+
sumx[lab, j] = val
566579

567-
for i in range(ncounts):
568-
for j in range(K):
569-
if nobs[i, j] < min_count:
570-
out[i, j] = NAN
571-
else:
572-
out[i, j] = sumx[i, j]
580+
# print('ncounts ' + str(ncounts))
581+
for i in range(ncounts):
582+
for j in range(K):
583+
if nobs[i, j] < min_count:
584+
# print('CATCH CASE WHERE nan is given, MIN_COUNT: ' + str(min_count) + ' nobs[i, j] ' + str(nobs[i, j]))
585+
out[i, j] = NAN
586+
else:
587+
# print('THIS IS OUT in ELSE CASE ' + str(sumx[i, j]))
588+
out[i, j] = sumx[i, j]
589+
# print('THIS IS OUT ' + str(out[i, j]))
573590

574591

575592
@cython.wraparound(False)

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,6 +1490,7 @@ def _agg_general(
14901490

14911491
with self._group_selection_context():
14921492
# try a cython aggregation if we can
1493+
# #import pdb; pdb.set_trace()
14931494
result = self._cython_agg_general(
14941495
how=alias,
14951496
alt=npfunc,

pandas/core/groupby/ops.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ def _call_cython_op(
514514
result_mask = result_mask.T
515515

516516
out_shape = self._get_output_shape(ngroups, values)
517+
#import pdb; pdb.set_trace()
517518
func, values = self.get_cython_func_and_vals(values, is_numeric)
518519
out_dtype = self.get_out_dtype(values.dtype)
519520

@@ -925,6 +926,7 @@ def _cython_operation(
925926

926927
ids, _, _ = self.group_info
927928
ngroups = self.ngroups
929+
#import pdb; pdb.set_trace()
928930
return cy_op.cython_operation(
929931
values=values,
930932
axis=axis,

pandas/core/internals/managers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,7 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
12591259
dropped_any = False
12601260

12611261
for blk in self.blocks:
1262+
#import pdb; pdb.set_trace()
12621263
if blk.is_object:
12631264
# split on object-dtype blocks bc some columns may raise
12641265
# while others do not.

pandas/tests/groupby/test_function.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def test_intercept_builtin_sum():
5858
result = grouped.agg(builtins.sum)
5959
result2 = grouped.apply(builtins.sum)
6060
expected = grouped.sum()
61+
import pdb; pdb.set_trace()
6162
tm.assert_series_equal(result, expected)
6263
tm.assert_series_equal(result2, expected)
6364

0 commit comments

Comments
 (0)