diff --git a/examples/case_studies/baseball.py b/examples/case_studies/baseball.py deleted file mode 100644 index f861099a7..000000000 --- a/examples/case_studies/baseball.py +++ /dev/null @@ -1,41 +0,0 @@ -# -# Demonstrates the usage of hierarchical partial pooling -# See http://mc-stan.org/documentation/case-studies/pool-binary-trials.html for more details -# - -import numpy as np - -import pymc3 as pm - - -def build_model(): - data = np.loadtxt( - pm.get_data("efron-morris-75-data.tsv"), delimiter="\t", skiprows=1, usecols=(2, 3) - ) - - atbats = pm.floatX(data[:, 0]) - hits = pm.floatX(data[:, 1]) - - N = len(hits) - - # we want to bound the kappa below - BoundedKappa = pm.Bound(pm.Pareto, lower=1.0) - - with pm.Model() as model: - phi = pm.Uniform("phi", lower=0.0, upper=1.0) - kappa = BoundedKappa("kappa", alpha=1.0001, m=1.5) - thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N) - ys = pm.Binomial("ys", n=atbats, p=thetas, observed=hits) - return model - - -def run(n=2000): - model = build_model() - with model: - trace = pm.sample(n, target_accept=0.99) - - pm.traceplot(trace) - - -if __name__ == "__main__": - run() diff --git a/examples/case_studies/disaster_model.py b/examples/case_studies/disaster_model.py deleted file mode 100644 index 432c5efc3..000000000 --- a/examples/case_studies/disaster_model.py +++ /dev/null @@ -1,154 +0,0 @@ -""" -A model for the disasters data with a changepoint - -changepoint ~ U(1851, 1962) -early_mean ~ Exp(1.) -late_mean ~ Exp(1.) -disasters[t] ~ Poi(early_mean if t <= switchpoint, late_mean otherwise) - -""" - -import arviz as az -import theano.tensor as tt - -from numpy import arange, array - -import pymc3 as pm - -__all__ = ["disasters_data", "switchpoint", "early_mean", "late_mean", "rate", "disasters"] - - -# Time series of recorded coal mining disasters in the UK from 1851 to 1962 -disasters_data = array( - [ - 4, - 5, - 4, - 0, - 1, - 4, - 3, - 4, - 0, - 6, - 3, - 3, - 4, - 0, - 2, - 6, - 3, - 3, - 5, - 4, - 5, - 3, - 1, - 4, - 4, - 1, - 5, - 5, - 3, - 4, - 2, - 5, - 2, - 2, - 3, - 4, - 2, - 1, - 3, - 2, - 2, - 1, - 1, - 1, - 1, - 3, - 0, - 0, - 1, - 0, - 1, - 1, - 0, - 0, - 3, - 1, - 0, - 3, - 2, - 2, - 0, - 1, - 1, - 1, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 2, - 1, - 0, - 0, - 0, - 1, - 1, - 0, - 2, - 3, - 3, - 1, - 1, - 2, - 1, - 1, - 1, - 1, - 2, - 4, - 2, - 0, - 0, - 1, - 4, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 1, - 0, - 1, - ] -) -year = arange(1851, 1962) - -with pm.Model() as model: - switchpoint = pm.DiscreteUniform("switchpoint", lower=year.min(), upper=year.max()) - early_mean = pm.Exponential("early_mean", lam=1.0) - late_mean = pm.Exponential("late_mean", lam=1.0) - - # Allocate appropriate Poisson rates to years before and after current - # switchpoint location - rate = tt.switch(switchpoint >= year, early_mean, late_mean) - - disasters = pm.Poisson("disasters", rate, observed=disasters_data) - - # Initial values for stochastic nodes - start = {"early_mean": 2.0, "late_mean": 3.0} - - tr = pm.sample(1000, tune=500, start=start, cores=1) - az.plot_trace(tr) diff --git a/examples/case_studies/disaster_model_theano_op.py b/examples/case_studies/disaster_model_theano_op.py deleted file mode 100644 index 6f58dbb2d..000000000 --- a/examples/case_studies/disaster_model_theano_op.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Similar to disaster_model.py, but for arbitrary -deterministics which are not not working with Theano. -Note that gradient based samplers will not work. -""" - -import arviz as az -import theano.tensor as tt - -from numpy import arange, array, empty -from theano.compile.ops import as_op - -import pymc3 as pm - -__all__ = ["disasters_data", "switchpoint", "early_mean", "late_mean", "rate", "disasters"] - -# Time series of recorded coal mining disasters in the UK from 1851 to 1962 -disasters_data = array( - [ - 4, - 5, - 4, - 0, - 1, - 4, - 3, - 4, - 0, - 6, - 3, - 3, - 4, - 0, - 2, - 6, - 3, - 3, - 5, - 4, - 5, - 3, - 1, - 4, - 4, - 1, - 5, - 5, - 3, - 4, - 2, - 5, - 2, - 2, - 3, - 4, - 2, - 1, - 3, - 2, - 2, - 1, - 1, - 1, - 1, - 3, - 0, - 0, - 1, - 0, - 1, - 1, - 0, - 0, - 3, - 1, - 0, - 3, - 2, - 2, - 0, - 1, - 1, - 1, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 2, - 1, - 0, - 0, - 0, - 1, - 1, - 0, - 2, - 3, - 3, - 1, - 1, - 2, - 1, - 1, - 1, - 1, - 2, - 4, - 2, - 0, - 0, - 1, - 4, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 1, - 0, - 1, - ] -) -years = len(disasters_data) - - -@as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector]) -def rate_(switchpoint, early_mean, late_mean): - out = empty(years) - out[:switchpoint] = early_mean - out[switchpoint:] = late_mean - return out - - -with pm.Model() as model: - # Prior for distribution of switchpoint location - switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years) - # Priors for pre- and post-switch mean number of disasters - early_mean = pm.Exponential("early_mean", lam=1.0) - late_mean = pm.Exponential("late_mean", lam=1.0) - - # Allocate appropriate Poisson rates to years before and after current - # switchpoint location - idx = arange(years) - rate = rate_(switchpoint, early_mean, late_mean) - - # Data likelihood - disasters = pm.Poisson("disasters", rate, observed=disasters_data) - - # Use slice sampler for means - step1 = pm.Slice([early_mean, late_mean]) - # Use Metropolis for switchpoint, since it accommodates discrete variables - step2 = pm.Metropolis([switchpoint]) - - # Initial values for stochastic nodes - start = {"early_mean": 2.0, "late_mean": 3.0} - - tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], cores=1) - az.plot_trace(tr) diff --git a/examples/case_studies/gelman_bioassay.py b/examples/case_studies/gelman_bioassay.py deleted file mode 100644 index 3bead2e26..000000000 --- a/examples/case_studies/gelman_bioassay.py +++ /dev/null @@ -1,30 +0,0 @@ -from numpy import array, ones - -import pymc3 as pm - -# Samples for each dose level -n = 5 * ones(4, dtype=int) -# Log-dose -dose = array([-0.86, -0.3, -0.05, 0.73]) - -with pm.Model() as model: - # Logit-linear model parameters - alpha = pm.Normal("alpha", 0, sigma=100.0) - beta = pm.Normal("beta", 0, sigma=1.0) - - # Calculate probabilities of death - theta = pm.Deterministic("theta", pm.math.invlogit(alpha + beta * dose)) - - # Data likelihood - deaths = pm.Binomial("deaths", n=n, p=theta, observed=[0, 1, 3, 5]) - - -def run(n=1000): - if n == "short": - n = 50 - with model: - pm.sample(n, tune=1000) - - -if __name__ == "__main__": - run() diff --git a/examples/case_studies/gelman_schools.py b/examples/case_studies/gelman_schools.py deleted file mode 100644 index 4066e9320..000000000 --- a/examples/case_studies/gelman_schools.py +++ /dev/null @@ -1,52 +0,0 @@ -import numpy as np - -from arviz import loo -from pymc3 import HalfCauchy, Model, Normal, sample - -"""Original Stan model - -data { - int J; // number of schools - real y[J]; // estimated treatment effects - real sigma[J]; // s.e. of effect estimates -} -parameters { - real mu; - real tau; - real eta[J]; -} -transformed parameters { - real theta[J]; - for (j in 1:J) - theta[j] <- mu + tau * eta[j]; -} -model { - eta ~ normal(0, 1); - y ~ normal(theta, sigma); -} -""" - -J = 8 -y = np.array([28, 8, -3, 7, -1, 1, 18, 12]) -sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18]) - -with Model() as schools: - eta = Normal("eta", 0, 1, shape=J) - mu = Normal("mu", 0, sigma=1e6) - tau = HalfCauchy("tau", 25) - - theta = mu + tau * eta - - obs = Normal("obs", theta, sigma=sigma, observed=y) - - -def run(n=1000): - if n == "short": - n = 50 - with schools: - tr = sample(n) - loo(tr) - - -if __name__ == "__main__": - run() diff --git a/examples/case_studies/lasso_missing.py b/examples/case_studies/lasso_missing.py deleted file mode 100644 index 1757811ed..000000000 --- a/examples/case_studies/lasso_missing.py +++ /dev/null @@ -1,62 +0,0 @@ -import pandas as pd - -from numpy.ma import masked_values - -import pymc3 as pm - -# Import data, filling missing values with sentinels (-999) -test_scores = pd.read_csv(pm.get_data("test_scores.csv")).fillna(-999) - -# Extract variables: test score, gender, number of siblings, previous disability, age, -# mother with HS education or better, hearing loss identified by 3 months -# of age -(score, male, siblings, disability, age, mother_hs, early_ident) = ( - test_scores[["score", "male", "siblings", "prev_disab", "age_test", "mother_hs", "early_ident"]] - .astype(float) - .values.T -) - -with pm.Model() as model: - # Impute missing values - sib_mean = pm.Exponential("sib_mean", 1.0) - siblings_imp = pm.Poisson("siblings_imp", sib_mean, observed=siblings) - - p_disab = pm.Beta("p_disab", 1.0, 1.0) - disability_imp = pm.Bernoulli( - "disability_imp", p_disab, observed=masked_values(disability, value=-999) - ) - - p_mother = pm.Beta("p_mother", 1.0, 1.0) - mother_imp = pm.Bernoulli("mother_imp", p_mother, observed=masked_values(mother_hs, value=-999)) - - s = pm.HalfCauchy("s", 5.0, testval=5) - beta = pm.Laplace("beta", 0.0, 100.0, shape=7, testval=0.1) - - expected_score = ( - beta[0] - + beta[1] * male - + beta[2] * siblings_imp - + beta[3] * disability_imp - + beta[4] * age - + beta[5] * mother_imp - + beta[6] * early_ident - ) - - observed_score = pm.Normal("observed_score", expected_score, s, observed=score) - - -with model: - start = pm.find_MAP() - step1 = pm.NUTS([beta, s, p_disab, p_mother, sib_mean], scaling=start) - step2 = pm.BinaryGibbsMetropolis([mother_imp.missing_values, disability_imp.missing_values]) - - -def run(n=5000): - if n == "short": - n = 100 - with model: - pm.sample(n, step=[step1, step2], start=start) - - -if __name__ == "__main__": - run() diff --git a/examples/case_studies/lightspeed_example.py b/examples/case_studies/lightspeed_example.py deleted file mode 100644 index 061a28617..000000000 --- a/examples/case_studies/lightspeed_example.py +++ /dev/null @@ -1,99 +0,0 @@ -import numpy as np -import arviz as az -import pymc3 as pm - -light_speed = np.array( - [ - 28, - 26, - 33, - 24, - 34, - -44, - 27, - 16, - 40, - -2, - 29, - 22, - 24, - 21, - 25, - 30, - 23, - 29, - 31, - 19, - 24, - 20, - 36, - 32, - 36, - 28, - 25, - 21, - 28, - 29, - 37, - 25, - 28, - 26, - 30, - 32, - 36, - 26, - 30, - 22, - 36, - 23, - 27, - 27, - 28, - 27, - 31, - 27, - 26, - 33, - 26, - 32, - 32, - 24, - 39, - 28, - 24, - 25, - 32, - 25, - 29, - 27, - 28, - 29, - 16, - 23, - ] -) - -model_1 = pm.Model() - -with model_1: - # priors as specified in stan model - # mu = pm.Uniform('mu', lower = -tt.inf, upper= np.inf) - # sigma = pm.Uniform('sigma', lower = 0, upper= np.inf) - - # using vague priors works - mu = pm.Uniform("mu", lower=light_speed.std() / 1000.0, upper=light_speed.std() * 1000.0) - sigma = pm.Uniform("sigma", lower=light_speed.std() / 1000.0, upper=light_speed.std() * 1000.0) - - # define likelihood - y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=light_speed) - - -def run(n=5000): - with model_1: - trace = pm.sample(n) - - az.summary(trace) - - -if __name__ == "__main__": - run()