From 002955a6db57de5cb49792bf9691ba9dcb25c78b Mon Sep 17 00:00:00 2001 From: Blaise Frederick Date: Tue, 21 Sep 2021 10:25:37 -0400 Subject: [PATCH 1/2] Added caching features to get ~2 order of magnitude speedup. --- nipype/algorithms/icc.py | 51 ++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/nipype/algorithms/icc.py b/nipype/algorithms/icc.py index 06161a6fe6..31d6ff92d4 100644 --- a/nipype/algorithms/icc.py +++ b/nipype/algorithms/icc.py @@ -86,20 +86,41 @@ def _list_outputs(self): return outputs -def ICC_rep_anova(Y): +def ICC_rep_anova(Y, nocache=False): """ the data Y are entered as a 'table' ie subjects are in rows and repeated measures in columns - One Sample Repeated measure ANOVA - Y = XB + E with X = [FaTor / Subjects] - """ - [nb_subjects, nb_conditions] = Y.shape - dfc = nb_conditions - 1 - dfe = (nb_subjects - 1) * dfc - dfr = nb_subjects - 1 + This is a hacked up (but fully compatible) version of ICC_rep_anova + from nipype that caches some very expensive operations that depend + only on the input array shape - if you're going to run the routine + multiple times (like, on every voxel of an image), this gives you a + HUGE speed boost for large input arrays. If you change the dimensions + of Y, it will reinitialize automatially. Set nocache to True to get + the original, much slower behavior. No, actually, don't do that. + """ + global icc_inited + global current_Y_shape + global dfc, dfe, dfr + global nb_subjects, nb_conditions + global x, x0, X + global centerbit + + try: + current_Y_shape + if nocache or (current_Y_shape != Y.shape): + icc_inited = False + except NameError: + icc_inited = False + + if not icc_inited: + [nb_subjects, nb_conditions] = Y.shape + current_Y_shape = Y.shape + dfc = nb_conditions - 1 + dfe = (nb_subjects - 1) * dfc + dfr = nb_subjects - 1 # Compute the repeated measure effect # ------------------------------------ @@ -109,12 +130,14 @@ def ICC_rep_anova(Y): SST = ((Y - mean_Y) ** 2).sum() # create the design matrix for the different levels - x = kron(eye(nb_conditions), ones((nb_subjects, 1))) # sessions - x0 = tile(eye(nb_subjects), (nb_conditions, 1)) # subjects - X = hstack([x, x0]) + if not icc_inited: + x = kron(eye(nb_conditions), ones((nb_subjects, 1))) # sessions + x0 = tile(eye(nb_subjects), (nb_conditions, 1)) # subjects + X = hstack([x, x0]) + centerbit = X @ pinv(X.T @ X, hermitian=True) @ X.T # Sum Square Error - predicted_Y = X @ (pinv(X.T @ X, hermitian=True) @ (X.T @ Y.flatten("F"))) + predicted_Y = centerbit @ Y.flatten("F") residuals = Y.flatten("F") - predicted_Y SSE = (residuals**2).sum() @@ -122,7 +145,7 @@ def ICC_rep_anova(Y): MSE = SSE / dfe - # Sum square session effect - between colums/sessions + # Sum square session effect - between columns/sessions SSC = ((mean(Y, 0) - mean_Y) ** 2).sum() * nb_subjects MSC = SSC / dfc / nb_subjects @@ -139,4 +162,6 @@ def ICC_rep_anova(Y): e_var = MSE # variance of error r_var = (MSR - MSE) / nb_conditions # variance between subjects + icc_inited = True + return ICC, r_var, e_var, session_effect_F, dfc, dfe From ad2249afd0216c2be8067906d8287c31047a25f0 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 21 Apr 2022 21:37:53 -0400 Subject: [PATCH 2/2] RF: Use LRU cache and simplify ICC_rep_anova calculations --- nipype/algorithms/icc.py | 83 ++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/nipype/algorithms/icc.py b/nipype/algorithms/icc.py index 31d6ff92d4..38f56d6541 100644 --- a/nipype/algorithms/icc.py +++ b/nipype/algorithms/icc.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import os +from functools import lru_cache import numpy as np from numpy import ones, kron, mean, eye, hstack, tile from numpy.linalg import pinv @@ -86,67 +87,61 @@ def _list_outputs(self): return outputs -def ICC_rep_anova(Y, nocache=False): +@lru_cache(maxsize=1) +def ICC_projection_matrix(shape): + nb_subjects, nb_conditions = shape + + x = kron(eye(nb_conditions), ones((nb_subjects, 1))) # sessions + x0 = tile(eye(nb_subjects), (nb_conditions, 1)) # subjects + X = hstack([x, x0]) + return X @ pinv(X.T @ X, hermitian=True) @ X.T + + +def ICC_rep_anova(Y, projection_matrix=None): """ the data Y are entered as a 'table' ie subjects are in rows and repeated measures in columns + One Sample Repeated measure ANOVA + Y = XB + E with X = [FaTor / Subjects] - This is a hacked up (but fully compatible) version of ICC_rep_anova - from nipype that caches some very expensive operations that depend - only on the input array shape - if you're going to run the routine - multiple times (like, on every voxel of an image), this gives you a - HUGE speed boost for large input arrays. If you change the dimensions - of Y, it will reinitialize automatially. Set nocache to True to get - the original, much slower behavior. No, actually, don't do that. + ``ICC_rep_anova`` involves an expensive operation to compute a projection + matrix, which depends only on the shape of ``Y``, which is computed by + calling ``ICC_projection_matrix(Y.shape)``. If arrays of multiple shapes are + expected, it may be worth pre-computing and passing directly as an + argument to ``ICC_rep_anova``. + + If only one ``Y.shape`` will occur, you do not need to explicitly handle + these, as the most recently calculated matrix is cached automatically. + For example, if you are running the same computation on every voxel of + an image, you will see significant speedups. + + If a ``Y`` is passed with a new shape, a new matrix will be calculated + automatically. """ - global icc_inited - global current_Y_shape - global dfc, dfe, dfr - global nb_subjects, nb_conditions - global x, x0, X - global centerbit - - try: - current_Y_shape - if nocache or (current_Y_shape != Y.shape): - icc_inited = False - except NameError: - icc_inited = False - - if not icc_inited: - [nb_subjects, nb_conditions] = Y.shape - current_Y_shape = Y.shape - dfc = nb_conditions - 1 - dfe = (nb_subjects - 1) * dfc - dfr = nb_subjects - 1 + [nb_subjects, nb_conditions] = Y.shape + dfc = nb_conditions - 1 + dfr = nb_subjects - 1 + dfe = dfr * dfc # Compute the repeated measure effect # ------------------------------------ # Sum Square Total - mean_Y = mean(Y) - SST = ((Y - mean_Y) ** 2).sum() - - # create the design matrix for the different levels - if not icc_inited: - x = kron(eye(nb_conditions), ones((nb_subjects, 1))) # sessions - x0 = tile(eye(nb_subjects), (nb_conditions, 1)) # subjects - X = hstack([x, x0]) - centerbit = X @ pinv(X.T @ X, hermitian=True) @ X.T + demeaned_Y = Y - mean(Y) + SST = np.sum(demeaned_Y**2) # Sum Square Error - predicted_Y = centerbit @ Y.flatten("F") - residuals = Y.flatten("F") - predicted_Y - SSE = (residuals**2).sum() - - residuals.shape = Y.shape + if projection_matrix is None: + projection_matrix = ICC_projection_matrix(Y.shape) + residuals = Y.flatten("F") - (projection_matrix @ Y.flatten("F")) + SSE = np.sum(residuals**2) MSE = SSE / dfe # Sum square session effect - between columns/sessions - SSC = ((mean(Y, 0) - mean_Y) ** 2).sum() * nb_subjects + SSC = np.sum(mean(demeaned_Y, 0) ** 2) * nb_subjects MSC = SSC / dfc / nb_subjects session_effect_F = MSC / MSE @@ -162,6 +157,4 @@ def ICC_rep_anova(Y, nocache=False): e_var = MSE # variance of error r_var = (MSR - MSE) / nb_conditions # variance between subjects - icc_inited = True - return ICC, r_var, e_var, session_effect_F, dfc, dfe