diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 4cbb2a6c61..e1fb5bedca 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -26,6 +26,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang - `math.logsumexp` now matches `scipy.special.logsumexp` when arrays contain infinite values (see [#4360](https://github.com/pymc-devs/pymc3/pull/4360)). - Fixed mathematical formulation in `MvStudentT` random method. (see [#4359](https://github.com/pymc-devs/pymc3/pull/4359)) - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367)) +- Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368)) ## PyMC3 3.10.0 (7 December 2020) diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 68d0ff4026..a90d239389 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -1445,7 +1445,7 @@ class MatrixNormal(Continuous): .. math:: f(x \mid \mu, U, V) = - \frac{1}{(2\pi |U|^n |V|^m)^{1/2}} + \frac{1}{(2\pi^{m n} |U|^n |V|^m)^{1/2}} \exp\left\{ -\frac{1}{2} \mathrm{Tr}[ V^{-1} (x-\mu)^{\prime} U^{-1} (x-\mu)] \right\} @@ -1637,27 +1637,19 @@ def random(self, point=None, size=None): mu, colchol, rowchol = draw_values( [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size ) - if size is None: - size = () - if size in (None, ()): - standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1])) - samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T)) - else: - samples = [] - size = tuple(np.atleast_1d(size)) - if mu.shape == tuple(self.shape): - for _ in range(np.prod(size)): - standard_normal = np.random.standard_normal((self.shape[0], colchol.shape[-1])) - samples.append(mu + np.matmul(rowchol, np.matmul(standard_normal, colchol.T))) - else: - for j in range(np.prod(size)): - standard_normal = np.random.standard_normal( - (self.shape[0], colchol[j].shape[-1]) - ) - samples.append( - mu[j] + np.matmul(rowchol[j], np.matmul(standard_normal, colchol[j].T)) - ) - samples = np.array(samples).reshape(size + tuple(self.shape)) + size = to_tuple(size) + dist_shape = to_tuple(self.shape) + output_shape = size + dist_shape + + # Broadcasting all parameters + (mu,) = broadcast_dist_samples_to(to_shape=output_shape, samples=[mu], size=size) + rowchol = np.broadcast_to(rowchol, shape=size + rowchol.shape[-2:]) + + colchol = np.broadcast_to(colchol, shape=size + colchol.shape[-2:]) + colchol = np.swapaxes(colchol, -1, -2) # Take transpose + + standard_normal = np.random.standard_normal(output_shape) + samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol)) return samples def _trquaddist(self, value): diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 55ea76a664..8d19836174 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -849,6 +849,12 @@ def ref_rand_chol(size, mu, rowchol, colchol): size, mu, rowcov=np.dot(rowchol, rowchol.T), colcov=np.dot(colchol, colchol.T) ) + def ref_rand_chol_transpose(size, mu, rowchol, colchol): + colchol = colchol.T + return ref_rand( + size, mu, rowcov=np.dot(rowchol, rowchol.T), colcov=np.dot(colchol, colchol.T) + ) + def ref_rand_uchol(size, mu, rowchol, colchol): return ref_rand( size, mu, rowcov=np.dot(rowchol.T, rowchol), colcov=np.dot(colchol.T, colchol) @@ -858,7 +864,7 @@ def ref_rand_uchol(size, mu, rowchol, colchol): pymc3_random( pm.MatrixNormal, {"mu": RealMatrix(n, n), "rowcov": PdMatrix(n), "colcov": PdMatrix(n)}, - size=n, + size=100, valuedomain=RealMatrix(n, n), ref_rand=ref_rand, ) @@ -867,7 +873,7 @@ def ref_rand_uchol(size, mu, rowchol, colchol): pymc3_random( pm.MatrixNormal, {"mu": RealMatrix(n, n), "rowchol": PdMatrixChol(n), "colchol": PdMatrixChol(n)}, - size=n, + size=100, valuedomain=RealMatrix(n, n), ref_rand=ref_rand_chol, ) @@ -878,6 +884,22 @@ def ref_rand_uchol(size, mu, rowchol, colchol): # extra_args={'lower': False} # ) + # 2 sample test fails because cov becomes different if chol is transposed beforehand. + # This implicity means we need transpose of chol after drawing values in + # MatrixNormal.random method to match stats.matrix_normal.rvs method + with pytest.raises(AssertionError): + pymc3_random( + pm.MatrixNormal, + { + "mu": RealMatrix(n, n), + "rowchol": PdMatrixChol(n), + "colchol": PdMatrixChol(n), + }, + size=100, + valuedomain=RealMatrix(n, n), + ref_rand=ref_rand_chol_transpose, + ) + def test_kronecker_normal(self): def ref_rand(size, mu, covs, sigma): cov = pm.math.kronecker(covs[0], covs[1]).eval() @@ -1675,3 +1697,26 @@ def test_issue_3706(self): prior_pred = pm.sample_prior_predictive(1) assert prior_pred["X"].shape == (1, N, 2) + + +def test_matrix_normal_random_with_random_variables(): + """ + This test checks for shape correctness when using MatrixNormal distribution + with parameters as random variables. + Originally reported - https://github.com/pymc-devs/pymc3/issues/3585 + """ + K = 3 + D = 15 + mu_0 = np.zeros((D, K)) + lambd = 1.0 + with pm.Model() as model: + sd_dist = pm.HalfCauchy.dist(beta=2.5) + packedL = pm.LKJCholeskyCov("packedL", eta=2, n=D, sd_dist=sd_dist) + L = pm.expand_packed_triangular(D, packedL, lower=True) + Sigma = pm.Deterministic("Sigma", L.dot(L.T)) # D x D covariance + mu = pm.MatrixNormal( + "mu", mu=mu_0, rowcov=(1 / lambd) * Sigma, colcov=np.eye(K), shape=(D, K) + ) + prior = pm.sample_prior_predictive(2) + + assert prior["mu"].shape == (2, D, K)