Skip to content

Commit 66f2dc1

Browse files
committed
Revert "PERF: cythonize kendall correlation (pandas-dev#39132)"
This reverts commit 57ccd2a. The Kendall implementation failed to take into account ties and was inconsistent with scipy's method
1 parent 94a0eea commit 66f2dc1

File tree

2 files changed

+3
-96
lines changed

2 files changed

+3
-96
lines changed

pandas/_libs/algos.pyx

Lines changed: 0 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -515,98 +515,6 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
515515

516516
return result
517517

518-
519-
# ----------------------------------------------------------------------
520-
# Kendall correlation
521-
# Wikipedia article: https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient
522-
523-
@cython.boundscheck(False)
524-
@cython.wraparound(False)
525-
def nancorr_kendall(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarray:
526-
"""
527-
Perform kendall correlation on a 2d array
528-
529-
Parameters
530-
----------
531-
mat : np.ndarray[float64_t, ndim=2]
532-
Array to compute kendall correlation on
533-
minp : int, default 1
534-
Minimum number of observations required per pair of columns
535-
to have a valid result.
536-
537-
Returns
538-
-------
539-
numpy.ndarray[float64_t, ndim=2]
540-
Correlation matrix
541-
"""
542-
cdef:
543-
Py_ssize_t i, j, k, xi, yi, N, K
544-
ndarray[float64_t, ndim=2] result
545-
ndarray[float64_t, ndim=2] ranked_mat
546-
ndarray[uint8_t, ndim=2] mask
547-
float64_t currj
548-
ndarray[uint8_t, ndim=1] valid
549-
ndarray[int64_t] sorted_idxs
550-
ndarray[float64_t, ndim=1] col
551-
int64_t n_concordant
552-
int64_t total_concordant = 0
553-
int64_t total_discordant = 0
554-
float64_t kendall_tau
555-
int64_t n_obs
556-
557-
N, K = (<object>mat).shape
558-
559-
result = np.empty((K, K), dtype=np.float64)
560-
mask = np.isfinite(mat)
561-
562-
ranked_mat = np.empty((N, K), dtype=np.float64)
563-
564-
for i in range(K):
565-
ranked_mat[:, i] = rank_1d(mat[:, i])
566-
567-
for xi in range(K):
568-
sorted_idxs = ranked_mat[:, xi].argsort()
569-
ranked_mat = ranked_mat[sorted_idxs]
570-
mask = mask[sorted_idxs]
571-
for yi in range(xi + 1, K):
572-
valid = mask[:, xi] & mask[:, yi]
573-
if valid.sum() < minp:
574-
result[xi, yi] = NaN
575-
result[yi, xi] = NaN
576-
else:
577-
# Get columns and order second column using 1st column ranks
578-
if not valid.all():
579-
col = ranked_mat[valid.nonzero()][:, yi]
580-
else:
581-
col = ranked_mat[:, yi]
582-
n_obs = col.shape[0]
583-
total_concordant = 0
584-
total_discordant = 0
585-
for j in range(n_obs - 1):
586-
currj = col[j]
587-
# Count num concordant and discordant pairs
588-
n_concordant = 0
589-
for k in range(j, n_obs):
590-
if col[k] > currj:
591-
n_concordant += 1
592-
total_concordant += n_concordant
593-
total_discordant += (n_obs - 1 - j - n_concordant)
594-
# Note: we do total_concordant+total_discordant here which is
595-
# equivalent to the C(n, 2), the total # of pairs,
596-
# listed on wikipedia
597-
kendall_tau = (total_concordant - total_discordant) / \
598-
(total_concordant + total_discordant)
599-
result[xi, yi] = kendall_tau
600-
result[yi, xi] = kendall_tau
601-
602-
if mask[:, xi].sum() > minp:
603-
result[xi, xi] = 1
604-
else:
605-
result[xi, xi] = NaN
606-
607-
return result
608-
609-
610518
# ----------------------------------------------------------------------
611519

612520
ctypedef fused algos_t:

pandas/core/frame.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9385,7 +9385,8 @@ def corr(
93859385
regardless of the callable's behavior.
93869386
min_periods : int, optional
93879387
Minimum number of observations required per pair of columns
9388-
to have a valid result.
9388+
to have a valid result. Currently only available for Pearson
9389+
and Spearman correlation.
93899390
93909391
Returns
93919392
-------
@@ -9419,9 +9420,7 @@ def corr(
94199420
correl = libalgos.nancorr(mat, minp=min_periods)
94209421
elif method == "spearman":
94219422
correl = libalgos.nancorr_spearman(mat, minp=min_periods)
9422-
elif method == "kendall":
9423-
correl = libalgos.nancorr_kendall(mat, minp=min_periods)
9424-
elif callable(method):
9423+
elif method == "kendall" or callable(method):
94259424
if min_periods is None:
94269425
min_periods = 1
94279426
mat = mat.T

0 commit comments

Comments
 (0)