From 63f8dac33c2280cca88abddeb3f9a91a0af40541 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 24 Sep 2018 17:08:46 +0200 Subject: [PATCH 01/23] Signature of function has been provided. --- src/correlation.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index 8b1aef3c..c0266b98 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -62,6 +62,34 @@ where fn cov(&self, ddof: A) -> Array2 where A: Float + FromPrimitive; + + /// Return the (Pearson correlation coefficients)[https://en.wikipedia.org/wiki/Pearson_correlation_coefficient] + /// for a 2-dimensional array of observations `M`. + /// + /// Let `(r, o)` be the shape of `M`: + /// - `r` is the number of random variables; + /// - `o` is the number of observations we have collected + /// for each random variable. + /// + /// Every column in `M` is an experiment: a single observation for each + /// random variable. + /// Each row in `M` contains all the observations for a certain random variable. + /// + /// The Pearson correlation coefficient of two random variables is defined as: + /// + /// ```text + /// cov(X, Y) + /// rho(X, Y) = ―――――――――――― + /// std(X)std(Y) + /// ``` + /// + /// **Panics** if `M` is empty, if the type cast of `n_observations` + /// from `usize` to `A` fails or if the standard deviation of one of the random + /// variables is zero. + /// ``` + fn pearson_correlation(&self) -> Array2 + where + A: Float + FromPrimitive; } impl CorrelationExt for ArrayBase From 7eb89abfd3708d15f864a4dff270afb07d1505de Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 24 Sep 2018 17:41:09 +0200 Subject: [PATCH 02/23] Added unimplemented sketch to trait implementation. --- src/correlation.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index c0266b98..5a7052c7 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -115,6 +115,13 @@ where let covariance = denoised.dot(&denoised.t()); covariance.mapv_into(|x| x / dof) } + + fn pearson_correlation(&self) -> Array2 + where + A: Float + FromPrimitive, + { + unimplemented!() + } } #[cfg(test)] From da0f2842ee8145b2d841f95027e58545a0e77356 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 24 Sep 2018 18:02:03 +0200 Subject: [PATCH 03/23] Added new test module for pearson correlation tests. --- src/correlation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index 5a7052c7..e92dc95a 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -125,7 +125,7 @@ where } #[cfg(test)] -mod tests { +mod cov_tests { use super::*; use rand; use rand::distributions::Range; From 0d03aa247c718f3ba35819b37aab076d5ef38e9d Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 24 Sep 2018 18:04:34 +0200 Subject: [PATCH 04/23] Added a first test for pearson correlation. --- src/correlation.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index e92dc95a..2bfbebf1 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -218,4 +218,24 @@ mod cov_tests { ) ); } +} + +#[cfg(test)] +mod pearson_correlation_tests { + use super::*; + use rand::distributions::Range; + use ndarray_rand::RandomExt; + + quickcheck! { + fn pearson_correlation_matrix_is_symmetric(bound: f64) -> bool { + let n_random_variables = 3; + let n_observations = 4; + let a = Array::random( + (n_random_variables, n_observations), + Range::new(-bound.abs(), bound.abs()) + ); + let pearson_correlation = a.pearson_correlation(); + pearson_correlation.all_close(&pearson_correlation.t(), 1e-8) + } + } } \ No newline at end of file From dcda861908b2aed4f252039cdbc7383a5add6ce6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 24 Sep 2018 18:18:46 +0200 Subject: [PATCH 05/23] Depending on master branch of ndarray. --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 8cd4f5eb..a434f0e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ ndarray-rand = "0.8" [patch.crates-io] noisy_float = { git = "https://github.com/SergiusIW/noisy_float-rs.git", rev = "c33a94803987475bbd205c9ff5a697af533f9a17" } +ndarray = { git = "https://github.com/bluss/ndarray.git", branch = "master" } \ No newline at end of file From ee44d36317d9d27d2906390b65893a76147dcc03 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 24 Sep 2018 18:25:35 +0200 Subject: [PATCH 06/23] Basic implementation of pearson_correlation. --- src/correlation.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index 2bfbebf1..3267fac9 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -120,7 +120,15 @@ where where A: Float + FromPrimitive, { - unimplemented!() + let observation_axis = Axis(1); + // The ddof value doesn't matter, as long as we use the same one + // for computing covariance and standard deviation + let ddof = A::one(); + let cov = self.cov(ddof); + let std = self.std_axis(observation_axis, ddof).insert_axis(observation_axis); + let std_matrix = std.dot(&std.t()); + // element-wise division + cov / std_matrix } } From 031ac643a7f98aa45ec4924f82fa5d2396698c65 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 24 Sep 2018 18:28:23 +0200 Subject: [PATCH 07/23] Improved docstring for pearson_correlation. --- src/correlation.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index 3267fac9..b8cd1b2b 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -82,6 +82,11 @@ where /// rho(X, Y) = ―――――――――――― /// std(X)std(Y) /// ``` + /// + /// Let `R` be the matrix returned by this function. Then + /// ```text + /// R_ij = rho(X_i, X_j) + /// ``` /// /// **Panics** if `M` is empty, if the type cast of `n_observations` /// from `usize` to `A` fails or if the standard deviation of one of the random From 63675791686c758dbe46b3ae6b62a5518155a1f6 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 16:03:30 +0100 Subject: [PATCH 08/23] Changed test name - no need to repeat, given test module name. --- src/correlation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index dcad63e6..30aee9f4 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -257,7 +257,7 @@ mod pearson_correlation_tests { use ndarray_rand::RandomExt; quickcheck! { - fn pearson_correlation_matrix_is_symmetric(bound: f64) -> bool { + fn output_matrix_is_symmetric(bound: f64) -> bool { let n_random_variables = 3; let n_observations = 4; let a = Array::random( From ec457c832a522de89ce0547489b27c5e814851a0 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 16:21:29 +0100 Subject: [PATCH 09/23] Check what happens with constant random variables. --- src/correlation.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index 30aee9f4..da5cf9f5 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -267,5 +267,13 @@ mod pearson_correlation_tests { let pearson_correlation = a.pearson_correlation(); pearson_correlation.all_close(&pearson_correlation.t(), 1e-8) } + + fn constant_random_variables_have_nan_correlation(value: f64) -> bool { + let n_random_variables = 3; + let n_observations = 4; + let a = Array::from_elem((n_random_variables, n_observations), value); + let pearson_correlation = a.pearson_correlation(); + pearson_correlation.iter().map(|x| x.is_nan()).fold(true, |acc, flag| acc & flag) + } } } \ No newline at end of file From 678c09e17f5d329a090b4e0e6a4e77c824ced961 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 16:30:08 +0100 Subject: [PATCH 10/23] Removed double ndarray patch. --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 187cebf1..20bb68cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,5 +15,4 @@ ndarray-rand = "0.8" [patch.crates-io] ndarray = { git = "https://github.com/jturner314/ndarray.git", branch = "master" } -noisy_float = { git = "https://github.com/SergiusIW/noisy_float-rs.git", rev = "c33a94803987475bbd205c9ff5a697af533f9a17" } -ndarray = { git = "https://github.com/bluss/ndarray.git", branch = "master" } \ No newline at end of file +noisy_float = { git = "https://github.com/SergiusIW/noisy_float-rs.git", rev = "c33a94803987475bbd205c9ff5a697af533f9a17" } \ No newline at end of file From 29b763f4697f3d08299e8586dc63dd29232617bb Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 16:30:34 +0100 Subject: [PATCH 11/23] Changed ddof in pearson to avoid panic. --- src/correlation.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index da5cf9f5..781b90c9 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -129,7 +129,9 @@ where let observation_axis = Axis(1); // The ddof value doesn't matter, as long as we use the same one // for computing covariance and standard deviation - let ddof = A::one(); + // We choose -1 to avoid panicking when we only have one + // observation per random variable (or no observations at all) + let ddof = -A::one(); let cov = self.cov(ddof); let std = self.std_axis(observation_axis, ddof).insert_axis(observation_axis); let std_matrix = std.dot(&std.t()); @@ -276,4 +278,27 @@ mod pearson_correlation_tests { pearson_correlation.iter().map(|x| x.is_nan()).fold(true, |acc, flag| acc & flag) } } + + #[test] + fn test_zero_variables() { + let a = Array2::::zeros((0, 2)); + let pearson_correlation = a.pearson_correlation(); + assert_eq!(pearson_correlation.shape(), &[0, 0]); + } + + #[test] + fn test_zero_observations() { + let a = Array2::::zeros((2, 0)); + let pearson = a.pearson_correlation(); + assert_eq!(pearson.shape(), &[2, 2]); + let all_nan_flag = pearson.iter().map(|x| x.is_nan()).fold(true, |acc, flag| acc & flag); + assert_eq!(all_nan_flag, true); + } + + #[test] + fn test_zero_variables_zero_observations() { + let a = Array2::::zeros((0, 0)); + let pearson = a.pearson_correlation(); + assert_eq!(pearson.shape(), &[0, 0]); + } } \ No newline at end of file From 2b6e5de7d026819a05102be04cee17d97a5322d9 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 16:32:30 +0100 Subject: [PATCH 12/23] Fixed docs for Pearson Correlation - zero division panic. --- src/correlation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index 781b90c9..39d8287e 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -91,7 +91,7 @@ where /// /// **Panics** if `M` is empty, if the type cast of `n_observations` /// from `usize` to `A` fails or if the standard deviation of one of the random - /// variables is zero. + /// variables is zero and division by zero panics for type A. /// ``` fn pearson_correlation(&self) -> Array2 where From 9d8e6521aca3b4f83bbdbcb30b4482eb2084c86f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 16:43:47 +0100 Subject: [PATCH 13/23] Test Pearson correlation on random input. --- src/correlation.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index 39d8287e..a0e3f58f 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -301,4 +301,31 @@ mod pearson_correlation_tests { let pearson = a.pearson_correlation(); assert_eq!(pearson.shape(), &[0, 0]); } + + #[test] + fn test_for_random_array() { + let a = array![ + [0.16351516, 0.56863268, 0.16924196, 0.72579120], + [0.44342453, 0.19834387, 0.25411802, 0.62462382], + [0.97162731, 0.29958849, 0.17338142, 0.80198342], + [0.91727132, 0.79817799, 0.62237124, 0.38970998], + [0.26979716, 0.20887228, 0.95454999, 0.96290785] + ]; + let numpy_corrcoeff = array![ + [ 1. , 0.38089376, 0.08122504, -0.59931623, 0.1365648 ], + [ 0.38089376, 1. , 0.80918429, -0.52615195, 0.38954398], + [ 0.08122504, 0.80918429, 1. , 0.07134906, -0.17324776], + [-0.59931623, -0.52615195, 0.07134906, 1. , -0.8743213 ], + [ 0.1365648 , 0.38954398, -0.17324776, -0.8743213 , 1. ] + ]; + println!("{:?}", a.pearson_correlation()); + assert_eq!(a.ndim(), 2); + assert!( + a.pearson_correlation().all_close( + &numpy_corrcoeff, + 1e-7 + ) + ); + } + } \ No newline at end of file From 2615f71ba6fc0da8fa18955c25ecf190581586a7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 16:46:44 +0100 Subject: [PATCH 14/23] Remove println statement in test. --- src/correlation.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index a0e3f58f..1f960162 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -318,7 +318,6 @@ mod pearson_correlation_tests { [-0.59931623, -0.52615195, 0.07134906, 1. , -0.8743213 ], [ 0.1365648 , 0.38954398, -0.17324776, -0.8743213 , 1. ] ]; - println!("{:?}", a.pearson_correlation()); assert_eq!(a.ndim(), 2); assert!( a.pearson_correlation().all_close( From aae6984daa1483dcbf2d3ba10782d0f3941fe858 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 17:38:29 +0100 Subject: [PATCH 15/23] Debugging. --- src/correlation.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/correlation.rs b/src/correlation.rs index 1f960162..5c9d6ebb 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -291,6 +291,7 @@ mod pearson_correlation_tests { let a = Array2::::zeros((2, 0)); let pearson = a.pearson_correlation(); assert_eq!(pearson.shape(), &[2, 2]); + prinln!("{:?}", pearson); let all_nan_flag = pearson.iter().map(|x| x.is_nan()).fold(true, |acc, flag| acc & flag); assert_eq!(all_nan_flag, true); } From 2a7eb1ea771e1ccca177fb73a19d4c635fdfcda7 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 17:38:47 +0100 Subject: [PATCH 16/23] Fix typo --- src/correlation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index 5c9d6ebb..9185bfee 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -291,7 +291,7 @@ mod pearson_correlation_tests { let a = Array2::::zeros((2, 0)); let pearson = a.pearson_correlation(); assert_eq!(pearson.shape(), &[2, 2]); - prinln!("{:?}", pearson); + println!("{:?}", pearson); let all_nan_flag = pearson.iter().map(|x| x.is_nan()).fold(true, |acc, flag| acc & flag); assert_eq!(all_nan_flag, true); } From cea5208fbae226ea76f69a0fd06f33e78c0d026a Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Sat, 29 Sep 2018 18:29:49 +0100 Subject: [PATCH 17/23] Adding assertion in zero_observations case for covariance. --- src/correlation.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/correlation.rs b/src/correlation.rs index 9185bfee..f1366ff2 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -196,7 +196,7 @@ mod cov_tests { // Negative ddof (-1 < 0) to avoid invalid-ddof panic let cov = a.cov(-1.); assert_eq!(cov.shape(), &[2, 2]); - cov.mapv(|x| x.is_nan()); + assert!(cov.all_close(&Array2::::zeros((2, 2)), 1e-8)); } #[test] @@ -290,8 +290,6 @@ mod pearson_correlation_tests { fn test_zero_observations() { let a = Array2::::zeros((2, 0)); let pearson = a.pearson_correlation(); - assert_eq!(pearson.shape(), &[2, 2]); - println!("{:?}", pearson); let all_nan_flag = pearson.iter().map(|x| x.is_nan()).fold(true, |acc, flag| acc & flag); assert_eq!(all_nan_flag, true); } From bb09d7b627dd0de4fc35017fdcaf374ce0cf7d03 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 1 Oct 2018 09:21:10 +0100 Subject: [PATCH 18/23] Simplified one test. --- src/correlation.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/correlation.rs b/src/correlation.rs index b36ad56c..df1d05b1 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -290,8 +290,7 @@ mod pearson_correlation_tests { fn test_zero_observations() { let a = Array2::::zeros((2, 0)); let pearson = a.pearson_correlation(); - let all_nan_flag = pearson.iter().map(|x| x.is_nan()).fold(true, |acc, flag| acc & flag); - assert_eq!(all_nan_flag, true); + pearson.mapv(|x| x.is_nan()) } #[test] From ab9c0f2ed4623e392794544ee5848964b914541f Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 1 Oct 2018 09:26:27 +0100 Subject: [PATCH 19/23] Fix typo in test --- src/correlation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index df1d05b1..2a45a8ea 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -290,7 +290,7 @@ mod pearson_correlation_tests { fn test_zero_observations() { let a = Array2::::zeros((2, 0)); let pearson = a.pearson_correlation(); - pearson.mapv(|x| x.is_nan()) + pearson.mapv(|x| x.is_nan()); } #[test] From d8c3001cb45901bc8aeba9588534e6934ae41479 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 1 Oct 2018 09:34:13 +0100 Subject: [PATCH 20/23] Fixing documentation typo --- src/correlation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/correlation.rs b/src/correlation.rs index 2a45a8ea..b79b792f 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -64,7 +64,7 @@ where where A: Float + FromPrimitive; - /// Return the (Pearson correlation coefficients)[https://en.wikipedia.org/wiki/Pearson_correlation_coefficient] + /// Return the [Pearson correlation coefficients](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) /// for a 2-dimensional array of observations `M`. /// /// Let `(r, o)` be the shape of `M`: From 36cdeabb17284868bd4ce0b3df96e3623bddac8b Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 1 Oct 2018 09:35:40 +0100 Subject: [PATCH 21/23] Adding docs for the whole trait --- src/correlation.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index b79b792f..18db3adb 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -2,6 +2,8 @@ use ndarray::prelude::*; use ndarray::Data; use num_traits::{Float, FromPrimitive}; +/// Extension trait for ArrayBase providing functions +/// to compute different correlation measures. pub trait CorrelationExt where S: Data, From c5c7f5910ca054ae8d25cfed983b6be43eee1fd3 Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 1 Oct 2018 09:40:51 +0100 Subject: [PATCH 22/23] Added doc-test to pearson correlation method. --- src/correlation.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index 18db3adb..136adf75 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -95,6 +95,24 @@ where /// from `usize` to `A` fails or if the standard deviation of one of the random /// variables is zero and division by zero panics for type A. /// ``` + /// extern crate ndarray; + /// extern crate ndarray_stats; + /// use ndarray::arr2; + /// use ndarray_stats::CorrelationExt; + /// + /// let a = arr2(&[[1., 3., 5.], + /// [2., 4., 6.]]); + /// let corr = a.pearson_correlation(); + /// assert!( + /// corr.all_close( + /// &arr2(&[ + /// [1., 1.], + /// [1., 1.], + /// ]), + /// 1e-7 + /// ) + /// ); + /// ``` fn pearson_correlation(&self) -> Array2 where A: Float + FromPrimitive; From da58d0f16df77d3f003efc42f5cca19c6aa29aac Mon Sep 17 00:00:00 2001 From: LukeMathWalker Date: Mon, 1 Oct 2018 09:41:54 +0100 Subject: [PATCH 23/23] Added Example header before doc-test --- src/correlation.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/correlation.rs b/src/correlation.rs index 136adf75..2b8ebd74 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -93,6 +93,9 @@ where /// /// **Panics** if `M` is empty, if the type cast of `n_observations` /// from `usize` to `A` fails or if the standard deviation of one of the random + /// + /// # Example + /// /// variables is zero and division by zero panics for type A. /// ``` /// extern crate ndarray;