From 70323fc1339ba02da7234d8e430b3121d9f14bfe Mon Sep 17 00:00:00 2001 From: stevenjkern Date: Thu, 19 Jan 2017 11:51:31 -0600 Subject: [PATCH 1/7] ENH: add param_names attribute to most distributions --- pymc3/distributions/continuous.py | 15 +++++++++++++-- pymc3/distributions/discrete.py | 13 ++++++++++++- pymc3/distributions/timeseries.py | 4 ++++ pymc3/distributions/transforms.py | 4 ++-- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 58b4ae0947..690e784ae3 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -126,7 +126,7 @@ class Uniform(Continuous): def __init__(self, lower=0, upper=1, transform='interval', *args, **kwargs): super(Uniform, self).__init__(*args, **kwargs) - + self.param_names = ['lower', 'upper'] self.lower = lower self.upper = upper self.mean = (upper + lower) / 2. @@ -206,7 +206,7 @@ def __init__(self, *args, **kwargs): # called to display a warning we have to fetch the args and # kwargs manually. After a certain period we should revert # back to the old calling signature. - + self.param_names = ['mu', 'sd', 'tau'] if len(args) == 1: mu = args[0] sd = kwargs.pop('sd', None) @@ -273,6 +273,7 @@ class HalfNormal(PositiveContinuous): def __init__(self, sd=None, tau=None, *args, **kwargs): super(HalfNormal, self).__init__(*args, **kwargs) + self.param_names = ['sd', 'tau'] self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) self.mean = tt.sqrt(2 / (np.pi * self.tau)) self.variance = (1. - 2 / np.pi) / self.tau @@ -352,6 +353,7 @@ class Wald(PositiveContinuous): def __init__(self, mu=None, lam=None, phi=None, alpha=0., *args, **kwargs): super(Wald, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'lam', 'phi', 'alpha'] self.mu, self.lam, self.phi = self.get_mu_lam_phi(mu, lam, phi) self.alpha = alpha self.mean = self.mu + alpha @@ -458,6 +460,7 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs): super(Beta, self).__init__(*args, **kwargs) + self.param_names = ['alpha', 'beta', 'mu', 'sd'] alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha self.beta = beta @@ -520,6 +523,7 @@ class Exponential(PositiveContinuous): def __init__(self, lam, *args, **kwargs): super(Exponential, self).__init__(*args, **kwargs) + self.param_names = ['lam'] self.lam = lam self.mean = 1. / lam self.median = self.mean * tt.log(2) @@ -565,6 +569,7 @@ class Laplace(Continuous): def __init__(self, mu, b, *args, **kwargs): super(Laplace, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'b'] self.b = b self.mean = self.median = self.mode = self.mu = mu @@ -617,6 +622,7 @@ class Lognormal(PositiveContinuous): def __init__(self, mu=0, sd=None, tau=None, *args, **kwargs): super(Lognormal, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'tau'] self.mu = mu self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) @@ -678,6 +684,7 @@ class StudentT(Continuous): def __init__(self, nu, mu=0, lam=None, sd=None, *args, **kwargs): super(StudentT, self).__init__(*args, **kwargs) + self.param_names = ['nu', 'mu', 'lam'] self.nu = nu = tt.as_tensor_variable(nu) self.lam, self.sd = get_tau_sd(tau=lam, sd=sd) self.mean = self.median = self.mode = self.mu = mu @@ -845,6 +852,7 @@ class HalfCauchy(PositiveContinuous): def __init__(self, beta, *args, **kwargs): super(HalfCauchy, self).__init__(*args, **kwargs) + self.param_names = ['beta'] self.mode = 0 self.median = beta self.beta = beta @@ -1257,6 +1265,7 @@ class ExGaussian(Continuous): def __init__(self, mu, sigma, nu, *args, **kwargs): super(ExGaussian, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'sigma', 'nu'] self.mu = mu self.sigma = sigma self.nu = nu @@ -1319,6 +1328,7 @@ class VonMises(Continuous): def __init__(self, mu=0.0, kappa=None, transform='circular', *args, **kwargs): super(VonMises, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'kappa'] self.mean = self.median = self.mode = self.mu = mu self.kappa = kappa self.variance = 1 - i1(kappa) / i0(kappa) @@ -1382,6 +1392,7 @@ class SkewNormal(Continuous): """ def __init__(self, mu=0.0, sd=None, tau=None, alpha=1, *args, **kwargs): super(SkewNormal, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'sd', 'tau', 'alpha'] self.mu = mu self.tau, self.sd = get_tau_sd(tau=tau, sd=sd) self.alpha = alpha diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index edfdfaecb9..efd405c1a6 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -40,6 +40,7 @@ class Binomial(Discrete): def __init__(self, n, p, *args, **kwargs): super(Binomial, self).__init__(*args, **kwargs) + self.param_names = ['n', 'p'] self.n = n self.p = p self.mode = tt.cast(tt.round(n * p), self.dtype) @@ -91,6 +92,7 @@ class BetaBinomial(Discrete): def __init__(self, alpha, beta, n, *args, **kwargs): super(BetaBinomial, self).__init__(*args, **kwargs) + self.param_names = ['n', 'alpha', 'beta'] self.alpha = alpha self.beta = beta self.n = n @@ -147,6 +149,7 @@ class Bernoulli(Discrete): def __init__(self, p, *args, **kwargs): super(Bernoulli, self).__init__(*args, **kwargs) + self.param_names = ['p'] self.p = p self.mode = tt.cast(tt.round(p), 'int8') @@ -193,6 +196,7 @@ class Poisson(Discrete): def __init__(self, mu, *args, **kwargs): super(Poisson, self).__init__(*args, **kwargs) + self.param_names = ['mu'] self.mu = mu self.mode = tt.floor(mu).astype('int32') @@ -240,6 +244,7 @@ class NegativeBinomial(Discrete): def __init__(self, mu, alpha, *args, **kwargs): super(NegativeBinomial, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'alpha'] self.mu = mu self.alpha = alpha self.mode = tt.floor(mu).astype('int32') @@ -289,6 +294,7 @@ class Geometric(Discrete): def __init__(self, p, *args, **kwargs): super(Geometric, self).__init__(*args, **kwargs) + self.param_names = ['p'] self.p = p self.mode = 1 @@ -326,6 +332,7 @@ class DiscreteUniform(Discrete): def __init__(self, lower, upper, *args, **kwargs): super(DiscreteUniform, self).__init__(*args, **kwargs) + self.param_names = ['lower', 'upper'] self.lower = tt.floor(lower).astype('int32') self.upper = tt.floor(upper).astype('int32') self.mode = tt.maximum( @@ -373,6 +380,7 @@ class Categorical(Discrete): def __init__(self, p, *args, **kwargs): super(Categorical, self).__init__(*args, **kwargs) + self.param_names = ['p'] try: self.k = tt.shape(p)[-1].tag.test_value except AttributeError: @@ -419,12 +427,13 @@ class Constant(Discrete): Parameters ---------- - value : float or int + c : float or int Constant parameter. """ def __init__(self, c, *args, **kwargs): super(Constant, self).__init__(*args, **kwargs) + self.param_names = ['c'] self.mean = self.median = self.mode = self.c = c def random(self, point=None, size=None, repeat=None): @@ -479,6 +488,7 @@ class ZeroInflatedPoisson(Discrete): def __init__(self, theta, psi, *args, **kwargs): super(ZeroInflatedPoisson, self).__init__(*args, **kwargs) + self.param_names = ['theta', 'psi'] self.theta = theta self.psi = psi self.pois = Poisson.dist(theta) @@ -530,6 +540,7 @@ class ZeroInflatedNegativeBinomial(Discrete): def __init__(self, mu, alpha, psi, *args, **kwargs): super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs) + self.param_names = ['mu', 'alpha', 'psi'] self.mu = mu self.alpha = alpha self.psi = psi diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py index 8dd2e282fb..47d6b6a3b0 100644 --- a/pymc3/distributions/timeseries.py +++ b/pymc3/distributions/timeseries.py @@ -21,6 +21,7 @@ class AR1(Continuous): def __init__(self, k, tau_e, *args, **kwargs): super(AR1, self).__init__(*args, **kwargs) + self.param_names = ['k', 'tau_e'] self.k = k self.tau_e = tau_e self.tau = tau_e * (1 - k ** 2) @@ -57,6 +58,7 @@ class GaussianRandomWalk(Continuous): def __init__(self, tau=None, init=Flat.dist(), sd=None, mu=0., *args, **kwargs): super(GaussianRandomWalk, self).__init__(*args, **kwargs) + self.param_names = ['tau', 'sd', 'mu'] self.tau = tau self.sd = sd self.mu = mu @@ -101,6 +103,7 @@ class GARCH11(Continuous): def __init__(self, omega=None, alpha_1=None, beta_1=None, initial_vol=None, *args, **kwargs): super(GARCH11, self).__init__(*args, **kwargs) + self.param_names = ['omega', 'alpha_1', 'beta_1', 'initial_vol'] self.omega = omega self.alpha_1 = alpha_1 @@ -141,6 +144,7 @@ class EulerMaruyama(Continuous): """ def __init__(self, dt, sde_fn, sde_pars, *args, **kwds): super(EulerMaruyama, self).__init__(*args, **kwds) + self.param_names = ['dt'] self.dt = dt self.sde_fn = sde_fn self.sde_pars = sde_pars diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py index 4234d4530b..991e2d0422 100644 --- a/pymc3/distributions/transforms.py +++ b/pymc3/distributions/transforms.py @@ -193,7 +193,7 @@ class StickBreaking(Transform): Parameters ---------- eps : float, positive value - A small value for numerical stability in invlogit. + A small value for numerical stability in invlogit. """ name = "stickbreaking" @@ -250,7 +250,7 @@ def backward(self, y): def forward(self, x): return x - + def jacobian_det(self, x): return 0 From 0897c64d6ae2c9530cb86b72c986de61399ed9fb Mon Sep 17 00:00:00 2001 From: stevenjkern Date: Thu, 19 Jan 2017 11:52:42 -0600 Subject: [PATCH 2/7] ENH: add node and edges to Model.graph attribute each time a variable is added --- pymc3/model.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pymc3/model.py b/pymc3/model.py index d28f2340e0..807983c68c 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1,5 +1,6 @@ import threading +import networkx as nx import numpy as np import theano import theano.tensor as tt @@ -93,6 +94,14 @@ def _get_named_nodes(graph, nodes): return nodes +def get_all_parents(end_node, starting_list=[]): + for parent in end_node.owner.get_parents(): + starting_list.append(parent) + if parent.owner is not None: + get_all_parents(parent, starting_list) + return starting_list + + class Context(object): """Functionality for objects that put themselves in a context using the `with` statement. @@ -183,6 +192,7 @@ def __init__(self): self.potentials = [] self.missing_values = [] self.model = self + self.graph = nx.DiGraph() @property @memoize @@ -307,6 +317,18 @@ def Var(self, name, dist, data=None): self.missing_values.append(var.missing_values) self.named_vars[var.missing_values.name] = var.missing_values + self.graph.add_node(var) + for param_name in getattr(dist, "param_names", []): + param_value = getattr(dist, param_name) + owner = getattr(param_value, "owner", None) + if param_value in self.vars: + self.graph.add_edge(param_value, var) + elif owner is not None: + parents = get_all_parents(param_value) + for parent in parents: + if parent in self.vars: + self.graph.add_edge(parent, var) + self.add_random_variable(var) return var From 67276baa1ee9eb2883c9fe971f356f248d7b168d Mon Sep 17 00:00:00 2001 From: stevenjkern Date: Thu, 19 Jan 2017 13:05:42 -0600 Subject: [PATCH 3/7] FIX: remove unneccesary parameter names --- pymc3/distributions/continuous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 690e784ae3..7b344a5f5a 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -460,7 +460,7 @@ def __init__(self, alpha=None, beta=None, mu=None, sd=None, *args, **kwargs): super(Beta, self).__init__(*args, **kwargs) - self.param_names = ['alpha', 'beta', 'mu', 'sd'] + self.param_names = ['alpha', 'beta'] alpha, beta = self.get_alpha_beta(alpha, beta, mu, sd) self.alpha = alpha self.beta = beta From e48d789889d6839d9676a6a3b57afb31ad1c9b39 Mon Sep 17 00:00:00 2001 From: stevenjkern Date: Sat, 21 Jan 2017 21:25:41 -0600 Subject: [PATCH 4/7] MAINT: extract graph construction functionality to pymc3.plots.plot_network function --- pymc3/model.py | 22 ------------- pymc3/plots.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 28 deletions(-) diff --git a/pymc3/model.py b/pymc3/model.py index 807983c68c..d28f2340e0 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -1,6 +1,5 @@ import threading -import networkx as nx import numpy as np import theano import theano.tensor as tt @@ -94,14 +93,6 @@ def _get_named_nodes(graph, nodes): return nodes -def get_all_parents(end_node, starting_list=[]): - for parent in end_node.owner.get_parents(): - starting_list.append(parent) - if parent.owner is not None: - get_all_parents(parent, starting_list) - return starting_list - - class Context(object): """Functionality for objects that put themselves in a context using the `with` statement. @@ -192,7 +183,6 @@ def __init__(self): self.potentials = [] self.missing_values = [] self.model = self - self.graph = nx.DiGraph() @property @memoize @@ -317,18 +307,6 @@ def Var(self, name, dist, data=None): self.missing_values.append(var.missing_values) self.named_vars[var.missing_values.name] = var.missing_values - self.graph.add_node(var) - for param_name in getattr(dist, "param_names", []): - param_value = getattr(dist, param_name) - owner = getattr(param_value, "owner", None) - if param_value in self.vars: - self.graph.add_edge(param_value, var) - elif owner is not None: - parents = get_all_parents(param_value) - for parent in parents: - if parent in self.vars: - self.graph.add_edge(parent, var) - self.add_random_variable(var) return var diff --git a/pymc3/plots.py b/pymc3/plots.py index 6fb518b404..688e33d1bb 100644 --- a/pymc3/plots.py +++ b/pymc3/plots.py @@ -1,12 +1,13 @@ import numpy as np from scipy.stats import kde, mode import matplotlib.pyplot as plt +import networkx as nx import pymc3 as pm from .stats import quantiles, hpd from scipy.signal import gaussian, convolve __all__ = ['traceplot', 'kdeplot', 'kde2plot', - 'forestplot', 'autocorrplot', 'plot_posterior'] + 'forestplot', 'autocorrplot', 'plot_posterior', 'plot_network'] def traceplot(trace, varnames=None, transform=lambda x: x, figsize=None, @@ -783,13 +784,13 @@ def get_trace_dict(tr, varnames): fig.tight_layout() return ax - + def fast_kde(x): """ A fft-based Gaussian kernel density estimate (KDE) for computing the KDE on a regular grid. The code was adapted from https://github.com/mfouesneau/faststats - + Parameters ---------- @@ -801,13 +802,13 @@ def fast_kde(x): grid: A gridded 1D KDE of the input points (x). xmin: minimum value of x xmax: maximum value of x - + """ # add small jitter in case input values are the same x = np.random.normal(x, 1e-12) - + xmin, xmax = x.min(), x.max() - + n = len(x) nx = 256 @@ -839,3 +840,77 @@ def fast_kde(x): grid /= norm_factor return grid, xmin, xmax + + +def _get_all_parents(end_node, starting_list=[]): + for parent in end_node.owner.get_parents(): + starting_list.append(parent) + if parent.owner is not None: + _get_all_parents(parent, starting_list) + return starting_list + + +def plot_network(model, layout='spectral', plot=True, ax=None): + """ + Create and optionally draw a directed graph (network) representation of a + provided model. + + Parameters + ---------- + model : pymc3.Model + The Model object to be coerced into a graph. + layout : str + Layout to use when plotting the graph. Available options are: + 'circular', 'shell', 'spectral', 'spring', 'force-directed', 'random'. + plot : bool + Plots the graph using Matplotlib if True. If False, no plot is created + and only the graph object is returned. + ax : Matplotlib Axes object, optional + Draw the graph in specified Matplotlib axes. + + Returns + ------- + graph : networkx.DiGraph + The graph object extracted from the information in the model. + """ + + available_layouts = { + "circular": nx.circular_layout, + "shell": nx.shell_layout, + "spectral": nx.spectral_layout, + "spring": nx.spring_layout, + "force-directed": nx.fruchterman_reingold_layout, + "random": nx.random_layout, + } + + try: + layout_func = available_layouts[layout] + except KeyError: + msg = (u"The selected layout option ({}) is not available. Please " + u"choose from the following options: {}".format( + layout, available_layouts.keys() + )) + raise KeyError(msg) + + graph = nx.DiGraph() + + variables = model.named_vars.values() + for var in variables: + graph.add_node(var) + dist = var.distribution + for param_name in getattr(dist, "param_names", []): + param_value = getattr(dist, param_name) + owner = getattr(param_value, "owner", None) + if param_value in variables: + graph.add_edge(param_value, var) + elif owner is not None: + parents = _get_all_parents(param_value) + for parent in parents: + if parent in variables: + graph.add_edge(parent, var) + + if plot: + pos = layout_func(graph) + nx.draw_networkx(graph, pos, ax) + + return graph From ec4cbca5ab361e4951a293b3a7acd9ed4ab65eab Mon Sep 17 00:00:00 2001 From: stevenjkern Date: Sat, 21 Jan 2017 21:35:57 -0600 Subject: [PATCH 5/7] MAINT: add networkx to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index fc6625c5d9..6ed778ca8d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ pandas>=0.18.0 patsy>=0.4.0 joblib>=0.9 tqdm>=4.8.4 +networkx==1.11.0 CommonMark==0.5.4 recommonmark sphinx From a27e303849f2ba0221d27dd95b4dcc5f046e42f6 Mon Sep 17 00:00:00 2001 From: stevenjkern Date: Sat, 21 Jan 2017 22:00:10 -0600 Subject: [PATCH 6/7] MAINT: add networkx to create_testenv.sh installation list --- scripts/create_testenv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/create_testenv.sh b/scripts/create_testenv.sh index 82df0889d7..e370f56aed 100755 --- a/scripts/create_testenv.sh +++ b/scripts/create_testenv.sh @@ -23,7 +23,7 @@ then source activate testenv fi -conda install --yes pyqt=4.11.4 jupyter pyzmq numpy scipy nose matplotlib pandas Cython patsy statsmodels joblib coverage mkl-service +conda install --yes pyqt=4.11.4 jupyter pyzmq numpy scipy nose matplotlib pandas Cython patsy statsmodels networkx joblib coverage mkl-service if [ ${PYTHON_VERSION} == "2.7" ]; then conda install --yes mock enum34; fi From 66f8081ce45fca433f0f0a226ec547bb2debcffb Mon Sep 17 00:00:00 2001 From: stevenjkern Date: Sun, 12 Feb 2017 22:23:30 -0600 Subject: [PATCH 7/7] ENH: better model creation, tree layout --- pymc3/plots.py | 82 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/pymc3/plots.py b/pymc3/plots.py index 688e33d1bb..87a7f2e1e6 100644 --- a/pymc3/plots.py +++ b/pymc3/plots.py @@ -842,15 +842,67 @@ def fast_kde(x): return grid, xmin, xmax -def _get_all_parents(end_node, starting_list=[]): - for parent in end_node.owner.get_parents(): - starting_list.append(parent) - if parent.owner is not None: - _get_all_parents(parent, starting_list) - return starting_list - - -def plot_network(model, layout='spectral', plot=True, ax=None): +def get_inputs(node, variables): + if node.owner is not None: + for inp in node.owner.inputs: + if inp in variables: + yield inp + elif inp.owner is not None: + for i in get_inputs(inp, variables): + yield i + + +def tree_layout(graph): + if not graph.is_directed(): + raise ValueError('graph must be directed') + + if not nx.is_directed_acyclic_graph(graph): + raise ValueError('graph must not contain cycles') + + roots = [node for node, degree in graph.out_degree_iter() if degree < 1] + + # Find the tree width at every depth in order to layout + # the nodes in a justified manner + depths = {} + for node in graph.nodes(): + depth = 0 + parents = graph.successors(node) + while len(parents) > 0: + _node = parents[0] + parents = graph.successors(_node) + depth += 1 + depths[node] = depth + depths_list = depths.values() + + max_depth = max(depths_list) + widths = [depths_list.count(i) for i in range(max_depth+1)] + max_width = max(widths) + nodes_positioned_at_depth = [0] * len(widths) + + # breadth first tree transversal + midpoint = max_width / 2.0 + depth_positions = [] + for width in widths: + pos = (np.linspace(0, width, width) - + (width / 2.0) + + midpoint).tolist() + depth_positions.append(pos) + positions = {} + assert len(roots) == len(depth_positions[0]) + for root in roots: + root_depth = depths[root] + positions[root] = (depth_positions[root_depth].pop(0), root_depth) + for node in graph.nodes(): + if node not in positions: + node_depth = depths[node] + positions[node] = ( + depth_positions[node_depth].pop(0), node_depth + ) + + return positions + + +def plot_network(model, layout='tree', plot=True, ax=None): """ Create and optionally draw a directed graph (network) representation of a provided model. @@ -859,9 +911,10 @@ def plot_network(model, layout='spectral', plot=True, ax=None): ---------- model : pymc3.Model The Model object to be coerced into a graph. - layout : str + layout : str, default='tree' Layout to use when plotting the graph. Available options are: - 'circular', 'shell', 'spectral', 'spring', 'force-directed', 'random'. + 'circular', 'shell', 'spectral', 'spring', 'force-directed', 'random', + 'tree'. plot : bool Plots the graph using Matplotlib if True. If False, no plot is created and only the graph object is returned. @@ -881,6 +934,7 @@ def plot_network(model, layout='spectral', plot=True, ax=None): "spring": nx.spring_layout, "force-directed": nx.fruchterman_reingold_layout, "random": nx.random_layout, + "tree": tree_layout, } try: @@ -904,13 +958,13 @@ def plot_network(model, layout='spectral', plot=True, ax=None): if param_value in variables: graph.add_edge(param_value, var) elif owner is not None: - parents = _get_all_parents(param_value) - for parent in parents: + for parent in get_inputs(param_value, variables): if parent in variables: graph.add_edge(parent, var) if plot: pos = layout_func(graph) - nx.draw_networkx(graph, pos, ax) + nx.draw_networkx(graph, pos, arrows=True, node_size=1000, node_color='w', font_size=8) + plt.axis('off') return graph