Skip to content

Commit 613e365

Browse files
Merge remote-tracking branch 'upstream/main' into td-construction
2 parents f7ee822 + 6f33c46 commit 613e365

File tree

8 files changed

+126
-112
lines changed

8 files changed

+126
-112
lines changed

.github/workflows/windows.yml renamed to .github/workflows/macos-windows.yml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Windows
1+
name: Windows-MacOS
22

33
on:
44
push:
@@ -21,18 +21,20 @@ env:
2121

2222
jobs:
2323
pytest:
24-
runs-on: windows-latest
2524
defaults:
2625
run:
2726
shell: bash -el {0}
2827
timeout-minutes: 90
2928
strategy:
3029
matrix:
30+
os: [macos-latest, windows-latest]
3131
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
3232
fail-fast: false
33+
runs-on: ${{ matrix.os }}
34+
name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}
3335
concurrency:
3436
# https://github.community/t/concurrecy-not-work-for-push/183068/7
35-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-windows
37+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.os }}
3638
cancel-in-progress: true
3739

3840
steps:
@@ -47,10 +49,17 @@ jobs:
4749
mamba-version: "*"
4850
channels: conda-forge
4951
activate-environment: pandas-dev
50-
channel-priority: strict
52+
channel-priority: ${{ matrix.os == 'macos-latest' && 'flexible' || 'strict' }}
5153
environment-file: ci/deps/${{ matrix.env_file }}
5254
use-only-tar-bz2: true
5355

56+
# ImportError: 2): Library not loaded: @rpath/libssl.1.1.dylib
57+
# Referenced from: /Users/runner/miniconda3/envs/pandas-dev/lib/libthrift.0.13.0.dylib
58+
# Reason: image not found
59+
- name: Upgrade pyarrow on MacOS
60+
run: conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=6
61+
if: ${{ matrix.os == 'macos-latest' }}
62+
5463
- name: Build Pandas
5564
uses: ./.github/actions/build_pandas
5665

azure-pipelines.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,6 @@ variables:
2222
PANDAS_CI: 1
2323

2424
jobs:
25-
- template: ci/azure/posix.yml
26-
parameters:
27-
name: macOS
28-
vmImage: macOS-10.15
29-
3025
- job: py38_32bit
3126
pool:
3227
vmImage: ubuntu-18.04

ci/azure/posix.yml

Lines changed: 0 additions & 50 deletions
This file was deleted.

ci/setup_env.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,6 @@ mamba install -n pandas-dev 'setuptools<60'
7373
echo "conda list -n pandas-dev"
7474
conda list -n pandas-dev
7575

76-
# From pyarrow on MacOS
77-
# ImportError: 2): Library not loaded: @rpath/libssl.1.1.dylib
78-
# Referenced from: /Users/runner/miniconda3/envs/pandas-dev/lib/libthrift.0.13.0.dylib
79-
# Reason: image not found
80-
if [[ "$(uname)" == 'Darwin' ]]; then
81-
echo "Update pyarrow for pyarrow on MacOS"
82-
conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=6
83-
fi
84-
8576
if [[ "$BITS32" == "yes" ]]; then
8677
# activate 32-bit compiler
8778
export CONDA_BUILD=1

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,7 @@ Groupby/resample/rolling
732732
- Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`)
733733
- Bug in :meth:`Rolling.sum` and :meth:`Rolling.mean` would give incorrect result with window of same values (:issue:`42064`, :issue:`46431`)
734734
- Bug in :meth:`Rolling.var` and :meth:`Rolling.std` would give non-zero result with window of same values (:issue:`42064`)
735+
- Bug in :meth:`Rolling.skew` and :meth:`Rolling.kurt` would give NaN with window of same values (:issue:`30993`)
735736
- Bug in :meth:`.Rolling.var` would segfault calculating weighted variance when window size was larger than data size (:issue:`46760`)
736737
- Bug in :meth:`Grouper.__repr__` where ``dropna`` was not included. Now it is (:issue:`46754`)
737738
- Bug in :meth:`DataFrame.rolling` gives ValueError when center=True, axis=1 and win_type is specified (:issue:`46135`)

pandas/_libs/window/aggregations.pyx

Lines changed: 86 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,9 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
455455

456456

457457
cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
458-
float64_t x, float64_t xx,
459-
float64_t xxx) nogil:
458+
float64_t x, float64_t xx, float64_t xxx,
459+
int64_t num_consecutive_same_value
460+
) nogil:
460461
cdef:
461462
float64_t result, dnobs
462463
float64_t A, B, C, R
@@ -467,6 +468,12 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
467468
B = xx / dnobs - A * A
468469
C = xxx / dnobs - A * A * A - 3 * A * B
469470

471+
if nobs < 3:
472+
result = NaN
473+
# GH 42064 46431
474+
# uniform case, force result to be 0
475+
elif num_consecutive_same_value >= nobs:
476+
result = 0.0
470477
# #18044: with uniform distribution, floating issue will
471478
# cause B != 0. and cause the result is a very
472479
# large number.
@@ -476,7 +483,7 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs,
476483
# if the variance is less than 1e-14, it could be
477484
# treat as zero, here we follow the original
478485
# skew/kurt behaviour to check B <= 1e-14
479-
if B <= 1e-14 or nobs < 3:
486+
elif B <= 1e-14:
480487
result = NaN
481488
else:
482489
R = sqrt(B)
@@ -493,7 +500,10 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
493500
float64_t *xxx,
494501
float64_t *compensation_x,
495502
float64_t *compensation_xx,
496-
float64_t *compensation_xxx) nogil:
503+
float64_t *compensation_xxx,
504+
int64_t *num_consecutive_same_value,
505+
float64_t *prev_value,
506+
) nogil:
497507
""" add a value from the skew calc """
498508
cdef:
499509
float64_t y, t
@@ -515,6 +525,14 @@ cdef inline void add_skew(float64_t val, int64_t *nobs,
515525
compensation_xxx[0] = t - xxx[0] - y
516526
xxx[0] = t
517527

528+
# GH#42064, record num of same values to remove floating point artifacts
529+
if val == prev_value[0]:
530+
num_consecutive_same_value[0] += 1
531+
else:
532+
# reset to 1 (include current value itself)
533+
num_consecutive_same_value[0] = 1
534+
prev_value[0] = val
535+
518536

519537
cdef inline void remove_skew(float64_t val, int64_t *nobs,
520538
float64_t *x, float64_t *xx,
@@ -553,8 +571,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
553571
float64_t compensation_xx_add, compensation_xx_remove
554572
float64_t compensation_x_add, compensation_x_remove
555573
float64_t x, xx, xxx
574+
float64_t prev_value
556575
int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0
557-
int64_t s, e
576+
int64_t s, e, num_consecutive_same_value
558577
ndarray[float64_t] output, mean_array, values_copy
559578
bint is_monotonic_increasing_bounds
560579

@@ -588,6 +607,9 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
588607
# never removed
589608
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
590609

610+
prev_value = values[s]
611+
num_consecutive_same_value = 0
612+
591613
compensation_xxx_add = compensation_xxx_remove = 0
592614
compensation_xx_add = compensation_xx_remove = 0
593615
compensation_x_add = compensation_x_remove = 0
@@ -596,7 +618,8 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
596618
for j in range(s, e):
597619
val = values_copy[j]
598620
add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
599-
&compensation_xx_add, &compensation_xxx_add)
621+
&compensation_xx_add, &compensation_xxx_add,
622+
&num_consecutive_same_value, &prev_value)
600623

601624
else:
602625

@@ -612,9 +635,10 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
612635
for j in range(end[i - 1], e):
613636
val = values_copy[j]
614637
add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
615-
&compensation_xx_add, &compensation_xxx_add)
638+
&compensation_xx_add, &compensation_xxx_add,
639+
&num_consecutive_same_value, &prev_value)
616640

617-
output[i] = calc_skew(minp, nobs, x, xx, xxx)
641+
output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value)
618642

619643
if not is_monotonic_increasing_bounds:
620644
nobs = 0
@@ -630,35 +654,44 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
630654

631655
cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs,
632656
float64_t x, float64_t xx,
633-
float64_t xxx, float64_t xxxx) nogil:
657+
float64_t xxx, float64_t xxxx,
658+
int64_t num_consecutive_same_value,
659+
) nogil:
634660
cdef:
635661
float64_t result, dnobs
636662
float64_t A, B, C, D, R, K
637663

638664
if nobs >= minp:
639-
dnobs = <float64_t>nobs
640-
A = x / dnobs
641-
R = A * A
642-
B = xx / dnobs - R
643-
R = R * A
644-
C = xxx / dnobs - R - 3 * A * B
645-
R = R * A
646-
D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
647-
648-
# #18044: with uniform distribution, floating issue will
649-
# cause B != 0. and cause the result is a very
650-
# large number.
651-
#
652-
# in core/nanops.py nanskew/nankurt call the function
653-
# _zero_out_fperr(m2) to fix floating error.
654-
# if the variance is less than 1e-14, it could be
655-
# treat as zero, here we follow the original
656-
# skew/kurt behaviour to check B <= 1e-14
657-
if B <= 1e-14 or nobs < 4:
665+
if nobs < 4:
658666
result = NaN
667+
# GH 42064 46431
668+
# uniform case, force result to be -3.
669+
elif num_consecutive_same_value >= nobs:
670+
result = -3.
659671
else:
660-
K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
661-
result = K / ((dnobs - 2.) * (dnobs - 3.))
672+
dnobs = <float64_t>nobs
673+
A = x / dnobs
674+
R = A * A
675+
B = xx / dnobs - R
676+
R = R * A
677+
C = xxx / dnobs - R - 3 * A * B
678+
R = R * A
679+
D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A
680+
681+
# #18044: with uniform distribution, floating issue will
682+
# cause B != 0. and cause the result is a very
683+
# large number.
684+
#
685+
# in core/nanops.py nanskew/nankurt call the function
686+
# _zero_out_fperr(m2) to fix floating error.
687+
# if the variance is less than 1e-14, it could be
688+
# treat as zero, here we follow the original
689+
# skew/kurt behaviour to check B <= 1e-14
690+
if B <= 1e-14:
691+
result = NaN
692+
else:
693+
K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
694+
result = K / ((dnobs - 2.) * (dnobs - 3.))
662695
else:
663696
result = NaN
664697

@@ -671,7 +704,10 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
671704
float64_t *compensation_x,
672705
float64_t *compensation_xx,
673706
float64_t *compensation_xxx,
674-
float64_t *compensation_xxxx) nogil:
707+
float64_t *compensation_xxxx,
708+
int64_t *num_consecutive_same_value,
709+
float64_t *prev_value
710+
) nogil:
675711
""" add a value from the kurotic calc """
676712
cdef:
677713
float64_t y, t
@@ -697,6 +733,14 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs,
697733
compensation_xxxx[0] = t - xxxx[0] - y
698734
xxxx[0] = t
699735

736+
# GH#42064, record num of same values to remove floating point artifacts
737+
if val == prev_value[0]:
738+
num_consecutive_same_value[0] += 1
739+
else:
740+
# reset to 1 (include current value itself)
741+
num_consecutive_same_value[0] = 1
742+
prev_value[0] = val
743+
700744

701745
cdef inline void remove_kurt(float64_t val, int64_t *nobs,
702746
float64_t *x, float64_t *xx,
@@ -741,7 +785,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
741785
float64_t compensation_xx_remove, compensation_xx_add
742786
float64_t compensation_x_remove, compensation_x_add
743787
float64_t x, xx, xxx, xxxx
744-
int64_t nobs, s, e, N = len(start), V = len(values), nobs_mean = 0
788+
float64_t prev_value
789+
int64_t nobs, s, e, num_consecutive_same_value
790+
int64_t N = len(start), V = len(values), nobs_mean = 0
745791
ndarray[float64_t] output, values_copy
746792
bint is_monotonic_increasing_bounds
747793

@@ -775,6 +821,9 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
775821
# never removed
776822
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
777823

824+
prev_value = values[s]
825+
num_consecutive_same_value = 0
826+
778827
compensation_xxxx_add = compensation_xxxx_remove = 0
779828
compensation_xxx_remove = compensation_xxx_add = 0
780829
compensation_xx_remove = compensation_xx_add = 0
@@ -784,7 +833,8 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
784833
for j in range(s, e):
785834
add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
786835
&compensation_x_add, &compensation_xx_add,
787-
&compensation_xxx_add, &compensation_xxxx_add)
836+
&compensation_xxx_add, &compensation_xxxx_add,
837+
&num_consecutive_same_value, &prev_value)
788838

789839
else:
790840

@@ -800,9 +850,10 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
800850
for j in range(end[i - 1], e):
801851
add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
802852
&compensation_x_add, &compensation_xx_add,
803-
&compensation_xxx_add, &compensation_xxxx_add)
853+
&compensation_xxx_add, &compensation_xxxx_add,
854+
&num_consecutive_same_value, &prev_value)
804855

805-
output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
856+
output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx, num_consecutive_same_value)
806857

807858
if not is_monotonic_increasing_bounds:
808859
nobs = 0

pandas/tests/window/test_rolling.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,3 +1860,14 @@ def test_rolling_mean_sum_floating_artifacts():
18601860
assert (result[-3:] == 0).all()
18611861
result = r.sum()
18621862
assert (result[-3:] == 0).all()
1863+
1864+
1865+
def test_rolling_skew_kurt_floating_artifacts():
1866+
# GH 42064 46431
1867+
1868+
sr = Series([1 / 3, 4, 0, 0, 0, 0, 0])
1869+
r = sr.rolling(4)
1870+
result = r.skew()
1871+
assert (result[-2:] == 0).all()
1872+
result = r.kurt()
1873+
assert (result[-2:] == -3).all()

0 commit comments

Comments
 (0)