Closed
Description
When computing a standard deviation with np.nanstd
, it is both computationally and memory intensive for large arrays. There is a faster implementation using convolutions:
def nanstd_1d(a, w):
k = np.ones(w, dtype=int)
m = ~np.isnan(a)
a0 = np.where(m, a,0)
n = np.convolve(m,k,'valid')
c1 = np.convolve(a0, k,'valid')
f2 = c1**2
p2 = f2/n**2
f1 = np.convolve((a0**2)*m,k,'valid')+n*p2
out = np.sqrt((f1 - (2/n)*f2)/n)
return out
def rolling_nanstd(a, m, axis=0):
axis = axis - 1 # Account for rolling
return np.apply_along_axis(lambda a_row, m: nanstd_1d(a_row, m), axis=axis, arr=a, m=m)
T = np.random.rand(100)
m = 50
out_std = rolling_nanstd(T, m)
T = np.random.rand(5, 1_000_000)
m = 50
out_std = rolling_nanstd(T, m, axis=T.ndim)
This implementation is around 5x faster than np.nanstd
and uses about 5-10x less memory