@@ -515,98 +515,6 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
515
515
516
516
return result
517
517
518
-
519
- # ----------------------------------------------------------------------
520
- # Kendall correlation
521
- # Wikipedia article: https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient
522
-
523
- @ cython.boundscheck (False )
524
- @ cython.wraparound (False )
525
- def nancorr_kendall (ndarray[float64_t , ndim = 2 ] mat, Py_ssize_t minp = 1 ) -> ndarray:
526
- """
527
- Perform kendall correlation on a 2d array
528
-
529
- Parameters
530
- ----------
531
- mat : np.ndarray[float64_t , ndim = 2 ]
532
- Array to compute kendall correlation on
533
- minp : int , default 1
534
- Minimum number of observations required per pair of columns
535
- to have a valid result.
536
-
537
- Returns
538
- -------
539
- numpy.ndarray[float64_t , ndim = 2 ]
540
- Correlation matrix
541
- """
542
- cdef:
543
- Py_ssize_t i , j , k , xi , yi , N , K
544
- ndarray[float64_t , ndim = 2 ] result
545
- ndarray[float64_t , ndim = 2 ] ranked_mat
546
- ndarray[uint8_t , ndim = 2 ] mask
547
- float64_t currj
548
- ndarray[uint8_t , ndim = 1 ] valid
549
- ndarray[int64_t] sorted_idxs
550
- ndarray[float64_t , ndim = 1 ] col
551
- int64_t n_concordant
552
- int64_t total_concordant = 0
553
- int64_t total_discordant = 0
554
- float64_t kendall_tau
555
- int64_t n_obs
556
-
557
- N , K = (< object > mat).shape
558
-
559
- result = np.empty((K, K), dtype = np.float64)
560
- mask = np.isfinite(mat)
561
-
562
- ranked_mat = np.empty((N, K), dtype = np.float64)
563
-
564
- for i in range(K ):
565
- ranked_mat[:, i] = rank_1d(mat[:, i])
566
-
567
- for xi in range (K):
568
- sorted_idxs = ranked_mat[:, xi].argsort()
569
- ranked_mat = ranked_mat[sorted_idxs]
570
- mask = mask[sorted_idxs]
571
- for yi in range (xi + 1 , K):
572
- valid = mask[:, xi] & mask[:, yi]
573
- if valid.sum() < minp:
574
- result[xi, yi] = NaN
575
- result[yi, xi] = NaN
576
- else :
577
- # Get columns and order second column using 1st column ranks
578
- if not valid.all():
579
- col = ranked_mat[valid.nonzero()][:, yi]
580
- else :
581
- col = ranked_mat[:, yi]
582
- n_obs = col.shape[0 ]
583
- total_concordant = 0
584
- total_discordant = 0
585
- for j in range (n_obs - 1 ):
586
- currj = col[j]
587
- # Count num concordant and discordant pairs
588
- n_concordant = 0
589
- for k in range (j, n_obs):
590
- if col[k] > currj:
591
- n_concordant += 1
592
- total_concordant += n_concordant
593
- total_discordant += (n_obs - 1 - j - n_concordant)
594
- # Note: we do total_concordant+total_discordant here which is
595
- # equivalent to the C(n, 2), the total # of pairs,
596
- # listed on wikipedia
597
- kendall_tau = (total_concordant - total_discordant) / \
598
- (total_concordant + total_discordant)
599
- result[xi, yi] = kendall_tau
600
- result[yi, xi] = kendall_tau
601
-
602
- if mask[:, xi].sum() > minp:
603
- result[xi, xi] = 1
604
- else :
605
- result[xi, xi] = NaN
606
-
607
- return result
608
-
609
-
610
518
# ----------------------------------------------------------------------
611
519
612
520
ctypedef fused algos_t:
0 commit comments