diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 30b637a4d1..6f38590a77 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -476,8 +476,17 @@ class Bound(object):
 
     Example
     -------
-    boundedNormal = pymc3.Bound(pymc3.Normal, lower=0.0)
-    par = boundedNormal(mu=0.0, sd=1.0, testval=1.0)
+    # Bounded distribution can be defined before the model context
+    PositiveNormal = pm.Bound(pm.Normal, lower=0.0)
+    with pm.Model():
+        par1 = PositiveNormal('par1', mu=0.0, sd=1.0, testval=1.0)
+        # or within the model context
+        NegativeNormal = pm.Bound(pm.Normal, upper=0.0)
+        par2 = NegativeNormal('par2', mu=0.0, sd=1.0, testval=1.0)
+        
+        # or you can define it implicitly within the model context 
+        par3 = pm.Bound(pm.Normal, lower=-1.0, upper=1.0)(
+                'par3', mu=0.0, sd=1.0, testval=1.0)
     """
 
     def __init__(self, distribution, lower=-np.inf, upper=np.inf):
@@ -490,7 +499,8 @@ def __call__(self, *args, **kwargs):
             raise ValueError('Observed Bound distributions are not allowed. '
                              'If you want to model truncated data '
                              'you can use a pm.Potential in combination '
-                             'with the cumulative probability function.')
+                             'with the cumulative probability function. See '
+                             'pymc3/examples/censored_data.py for an example.')
         first, args = args[0], args[1:]
 
         return Bounded(first, self.distribution, self.lower, self.upper,
diff --git a/pymc3/examples/LKJ_correlation.py b/pymc3/examples/LKJ_correlation.py
index daac3feada..b2dc0bb1ff 100644
--- a/pymc3/examples/LKJ_correlation.py
+++ b/pymc3/examples/LKJ_correlation.py
@@ -1,59 +1,54 @@
 import theano.tensor as tt
 import numpy as np
 from numpy.random import multivariate_normal
-
 import pymc3 as pm
 
 # Generate some multivariate normal data:
 n_obs = 1000
 
 # Mean values:
-mu = np.linspace(0, 2, num=4)
-n_var = len(mu)
+mu_r = np.linspace(0, 2, num=4)
+n_var = len(mu_r)
 
 # Standard deviations:
 stds = np.ones(4) / 2.0
 
 # Correlation matrix of 4 variables:
-corr = np.array([[1.,  0.75,  0.,  0.15],
-                 [0.75,  1., -0.06,  0.19],
-                 [0., -0.06,  1., -0.04],
-                 [0.15,  0.19, -0.04,  1.]])
-cov_matrix = np.diag(stds).dot(corr.dot(np.diag(stds)))
-
-dataset = multivariate_normal(mu, cov_matrix, size=n_obs)
-
+corr_r = np.array([[1.,  0.75,  0.,  0.15],
+                   [0.75,  1., -0.06,  0.19],
+                   [0., -0.06,  1., -0.04],
+                   [0.15,  0.19, -0.04,  1.]])
+cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds)))
 
-# In order to convert the upper triangular correlation values to a complete
-# correlation matrix, we need to construct an index matrix:
-n_elem = int(n_var * (n_var - 1) / 2)
-tri_index = np.zeros([n_var, n_var], dtype=int)
-tri_index[np.triu_indices(n_var, k=1)] = np.arange(n_elem)
-tri_index[np.triu_indices(n_var, k=1)[::-1]] = np.arange(n_elem)
+dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs)
 
 with pm.Model() as model:
 
     mu = pm.Normal('mu', mu=0, sd=1, shape=n_var)
 
-    # We can specify separate priors for sigma and the correlation matrix:
-    sigma = pm.Uniform('sigma', shape=n_var)
-    corr_triangle = pm.LKJCorr('corr', n=1, p=n_var)
-    corr_matrix = corr_triangle[tri_index]
-    corr_matrix = tt.fill_diagonal(corr_matrix, 1)
+    # Note that we access the distribution for the standard
+    # deviations, and do not create a new random variable.
+    sd_dist = pm.HalfCauchy.dist(beta=2.5)
+    packed_chol = pm.LKJCholeskyCov('chol_cov', n=n_var, eta=1, sd_dist=sd_dist)
+    # compute the covariance matrix
+    chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True)
+    cov = tt.dot(chol, chol.T)
 
-    cov_matrix = tt.diag(sigma).dot(corr_matrix.dot(tt.diag(sigma)))
+    # Extract the standard deviations etc
+    sd = pm.Deterministic('sd', tt.sqrt(tt.diag(cov)))
+    corr = tt.diag(sd**-1).dot(cov.dot(tt.diag(sd**-1)))
+    r = pm.Deterministic('r', corr[np.triu_indices(n_var, k=1)])
 
-    like = pm.MvNormal('likelihood', mu=mu, cov=cov_matrix, observed=dataset)
+    like = pm.MvNormal('likelihood', mu=mu, chol=chol, observed=dataset)
 
 
 def run(n=1000):
     if n == "short":
         n = 50
     with model:
-        start = pm.find_MAP()
-        step = pm.NUTS(scaling=start)
-        trace = pm.sample(n, step=step, start=start)
-    return trace
+        trace = pm.sample(n)
+    pm.traceplot(trace, varnames=['mu', 'r'],
+                 lines={'mu': mu_r, 'r': corr_r[np.triu_indices(n_var, k=1)]})
 
 if __name__ == '__main__':
     run()
diff --git a/pymc3/examples/arbitrary_stochastic.py b/pymc3/examples/arbitrary_stochastic.py
index a342a453e5..74383e2762 100644
--- a/pymc3/examples/arbitrary_stochastic.py
+++ b/pymc3/examples/arbitrary_stochastic.py
@@ -4,23 +4,25 @@
 
 
 def build_model():
+    # data
+    failure = np.array([0., 1.])
+    value = np.array([1., 0.])
+    
+    # custom log-liklihood
+    def logp(failure, value):
+        return tt.sum(failure * tt.log(lam) - lam * value)
+    
+    # model
     with pm.Model() as model:
-        lam = pm.Exponential('lam', 1)
-        failure = np.array([0, 1])
-        value = np.array([1, 0])
-
-        def logp(failure, value):
-            return tt.sum(failure * np.log(lam) - lam * value)
+        lam = pm.Exponential('lam', 1.)
         pm.DensityDist('x', logp, observed={'failure': failure, 'value': value})
     return model
 
 
 def run(n_samples=3000):
     model = build_model()
-    start = model.test_point
-    h = pm.find_hessian(start, model=model)
-    step = pm.Metropolis(model.vars, h, blocked=True, model=model)
-    trace = pm.sample(n_samples, step=step, start=start, model=model)
+    with model:
+        trace = pm.sample(n_samples)
     return trace
 
 if __name__ == "__main__":
diff --git a/pymc3/examples/arma_example.py b/pymc3/examples/arma_example.py
index fc36040260..4c784e1bed 100644
--- a/pymc3/examples/arma_example.py
+++ b/pymc3/examples/arma_example.py
@@ -1,10 +1,10 @@
-from pymc3 import Normal, sample, Model, plots, Potential, variational, HalfCauchy
+import pymc3 as pm
 from theano import scan, shared
 
 import numpy as np
 """
 ARMA example
-It is interesting to note just how much more compact this is that the original STAN example
+It is interesting to note just how much more compact this is than the original STAN example
 
 The original implementation is in the STAN documentation by Gelman et al and is reproduced below
 
@@ -52,11 +52,11 @@
 
 def build_model():
     y = shared(np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32))
-    with Model() as arma_model:
-        sigma = HalfCauchy('sigma', 5)
-        theta = Normal('theta', 0, sd=2)
-        phi = Normal('phi', 0, sd=2)
-        mu = Normal('mu', 0, sd=10)
+    with pm.Model() as arma_model:
+        sigma = pm.HalfCauchy('sigma', 5.)
+        theta = pm.Normal('theta', 0., sd=2.)
+        phi = pm.Normal('phi', 0., sd=2.)
+        mu = pm.Normal('mu', 0., sd=10.)
 
         err0 = y[0] - (mu + phi * mu)
 
@@ -69,19 +69,18 @@ def calc_next(last_y, this_y, err, mu, phi, theta):
                       outputs_info=[err0],
                       non_sequences=[mu, phi, theta])
 
-        Potential('like', Normal.dist(0, sd=sigma).logp(err))
-        variational.advi(n=2000)
+        pm.Potential('like', pm.Normal.dist(0, sd=sigma).logp(err))
     return arma_model
 
 
 def run(n_samples=1000):
     model = build_model()
     with model:
-        trace = sample(draws=n_samples)
+        trace = pm.sample(draws=n_samples)
 
     burn = n_samples // 10
-    plots.traceplot(trace[burn:])
-    plots.forestplot(trace[burn:])
+    pm.plots.traceplot(trace[burn:])
+    pm.plots.forestplot(trace[burn:])
 
 
 if __name__ == '__main__':
diff --git a/pymc3/examples/baseball.py b/pymc3/examples/baseball.py
index 2bc93c3961..c924b7f308 100644
--- a/pymc3/examples/baseball.py
+++ b/pymc3/examples/baseball.py
@@ -5,33 +5,34 @@
 
 import pymc3 as pm
 import numpy as np
-import theano
 
-data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", skiprows=1, usecols=(2,3))
-
-atBats = data[:,0].astype(theano.config.floatX)
-hits = data[:,1].astype(theano.config.floatX)
-
-N = len( hits )
-
-model = pm.Model()
-
-# we want to bound the kappa below
-BoundedKappa = pm.Bound( pm.Pareto, lower=1.0 )
-
-with model:
-    phi = pm.Uniform( 'phi', lower=0.0, upper=1.0 )
-    kappa = BoundedKappa( 'kappa', alpha=1.0001, m=1.5 )
-    thetas = pm.Beta( 'thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N )
-    ys = pm.Binomial( 'ys', n=atBats, p=thetas, observed=hits )
-
-def run( n=100000 ):
+def build_model():
+    data = np.loadtxt(pm.get_data('efron-morris-75-data.tsv'), delimiter="\t", 
+                      skiprows=1, usecols=(2,3))
+    
+    atbats = pm.floatX(data[:,0])
+    hits = pm.floatX(data[:,1])
+    
+    N = len(hits)
+    
+    # we want to bound the kappa below
+    BoundedKappa = pm.Bound(pm.Pareto, lower=1.0)
+    
+    with pm.Model() as model:
+        phi = pm.Uniform('phi', lower=0.0, upper=1.0)
+        kappa = BoundedKappa('kappa', alpha=1.0001, m=1.5)
+        thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N)
+        ys = pm.Binomial('ys', n=atbats, p=thetas, observed=hits)
+    return model
+
+def run(n=2000):
+    model = build_model()
     with model:
         # initialize NUTS() with ADVI under the hood
-        trace = pm.sample( n )
+        trace = pm.sample(n, nuts_kwargs={'target_accept':.99})
 
     # drop some first samples as burnin
-    pm.traceplot( trace[1000:] )
+    pm.traceplot(trace[1000:])
 
 if __name__ == '__main__':
     run()
diff --git a/pymc3/examples/censored_data.py b/pymc3/examples/censored_data.py
index 084be25c93..51c9a0da2e 100644
--- a/pymc3/examples/censored_data.py
+++ b/pymc3/examples/censored_data.py
@@ -38,7 +38,7 @@ def normal_lcdf(mu, sigma, x):
     z = (x - mu) / sigma
     return tt.switch(
         tt.lt(z, -1.0),
-        tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2,
+        tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
         tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
     )
 
@@ -47,7 +47,7 @@ def normal_lccdf(mu, sigma, x):
     z = (x - mu) / sigma
     return tt.switch(
         tt.gt(z, 1.0),
-        tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2) - tt.sqr(z) / 2.,
+        tt.log(tt.erfcx(z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
         tt.log1p(-tt.erfc(-z / tt.sqrt(2.)) / 2.)
     )
 
diff --git a/pymc3/examples/custom_dists.py b/pymc3/examples/custom_dists.py
index 7487d16578..37f5ad80ea 100644
--- a/pymc3/examples/custom_dists.py
+++ b/pymc3/examples/custom_dists.py
@@ -11,7 +11,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-import pymc3
+import pymc3 as pm
 import theano.tensor as tt
 
 np.random.seed(42)
@@ -24,20 +24,23 @@
 ydata = np.random.normal(ydata, 10)
 data = {'x': xdata, 'y': ydata}
 
-with pymc3.Model() as model:
-    alpha = pymc3.Uniform('intercept', -100, 100)
+# define loglikelihood outside of the model context, otherwise njobs wont work:
+# Lambdas defined in local namespace are not picklable (see issue #1995)
+def loglike1(value):
+    return -1.5 * tt.log(1 + value**2)
+def loglike2(value):
+    return -tt.log(tt.abs_(value))
+
+with pm.Model() as model:
+    alpha = pm.Normal('intercept', mu=0, sd=100)
     # Create custom densities
-    beta = pymc3.DensityDist('slope', lambda value: -
-                             1.5 * tt.log(1 + value**2), testval=0)
-    sigma = pymc3.DensityDist(
-        'sigma', lambda value: -tt.log(tt.abs_(value)), testval=1)
+    beta = pm.DensityDist('slope', loglike1, testval=0)
+    sigma = pm.DensityDist('sigma', loglike2, testval=1)
     # Create likelihood
-    like = pymc3.Normal('y_est', mu=alpha + beta *
+    like = pm.Normal('y_est', mu=alpha + beta *
                         xdata, sd=sigma, observed=ydata)
 
-    start = pymc3.find_MAP()
-    step = pymc3.NUTS(scaling=start)  # Instantiate sampler
-    trace = pymc3.sample(10000, step, start=start)
+    trace = pm.sample(2000, njobs=2)
 
 
 #################################################
diff --git a/pymc3/examples/disaster_model_arbitrary_deterministic.py b/pymc3/examples/disaster_model_arbitrary_deterministic.py
index 95701808ae..08d43d6be1 100644
--- a/pymc3/examples/disaster_model_arbitrary_deterministic.py
+++ b/pymc3/examples/disaster_model_arbitrary_deterministic.py
@@ -7,8 +7,7 @@
 
 import pymc3 as pm
 import theano.tensor as tt
-from theano import as_op
-from numpy import arange, array, empty
+from numpy import arange, array
 
 __all__ = ['disasters_data', 'switchpoint', 'early_mean', 'late_mean', 'rate',
            'disasters']
@@ -23,18 +22,6 @@
                         0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
 years = len(disasters_data)
 
-# here is the trick
-
-
-@as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
-def rateFunc(switchpoint, early_mean, late_mean):
-    ''' Concatenate Poisson means '''
-    out = empty(years)
-    out[:switchpoint] = early_mean
-    out[switchpoint:] = late_mean
-    return out
-
-
 with pm.Model() as model:
 
     # Prior for distribution of switchpoint location
@@ -46,23 +33,18 @@ def rateFunc(switchpoint, early_mean, late_mean):
     # Allocate appropriate Poisson rates to years before and after current
     # switchpoint location
     idx = arange(years)
-    # theano style:
-    # rate = switch(switchpoint >= idx, early_mean, late_mean)
-    # non-theano style
-    rate = rateFunc(switchpoint, early_mean, late_mean)
-
+    rate = tt.switch(switchpoint >= idx, early_mean, late_mean)
+    
     # Data likelihood
     disasters = pm.Poisson('disasters', rate, observed=disasters_data)
 
-    # Initial values for stochastic nodes
-    start = {'early_mean': 2., 'late_mean': 3.}
-
     # Use slice sampler for means
     step1 = pm.Slice([early_mean, late_mean])
     # Use Metropolis for switchpoint, since it accomodates discrete variables
     step2 = pm.Metropolis([switchpoint])
-
-    # njobs>1 works only with most recent (mid August 2014) Theano version:
-    # https://github.com/Theano/Theano/pull/2021
-    tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=1)
+    
+    # Initial values for stochastic nodes
+    start = {'early_mean': 2., 'late_mean': 3.}
+    
+    tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=2)
     pm.traceplot(tr)
diff --git a/pymc3/examples/factor_potential.py b/pymc3/examples/factor_potential.py
index 6074179afe..ccb1c9251c 100644
--- a/pymc3/examples/factor_potential.py
+++ b/pymc3/examples/factor_potential.py
@@ -1,19 +1,24 @@
 import pymc3 as pm
 
-with pm.Model() as model:
-    x = pm.Normal('x', 1, 1)
-    x2 = pm.Potential('x2', -x ** 2)
+"""
+You can add an arbitrary factor potential to the model likelihood using 
+pm.Potential. For example you can added Jacobian Adjustment using pm.Potential
+when you do model reparameterization. It's similar to `increment_log_prob` in 
+STAN.
+"""
 
-    start = model.test_point
-    h = pm.find_hessian(start)
-    step = pm.Metropolis(model.vars, h)
+def build_model():
+    with pm.Model() as model:
+        x = pm.Normal('x', 1, 1)
+        x2 = pm.Potential('x2', -x ** 2)
+    return model
 
-
-def run(n=3000):
+def run(n=1000):
+    model = build_model()
     if n == "short":
         n = 50
     with model:
-        pm.sample(n, step=step, start=start)
+        pm.sample(n)
 
 if __name__ == '__main__':
     run()
diff --git a/pymc3/examples/garch_example.py b/pymc3/examples/garch_example.py
index 4b7589d18e..bfb0e44dbd 100644
--- a/pymc3/examples/garch_example.py
+++ b/pymc3/examples/garch_example.py
@@ -38,13 +38,10 @@ def get_garch_model():
     shape = r.shape
 
     with Model() as garch:
-        alpha1 = Normal('alpha1', mu=np.zeros(shape=shape), sd=np.ones(shape=shape), shape=shape)
+        alpha1 = Normal('alpha1', mu=0., sd=1., shape=shape)
         BoundedNormal = Bound(Normal, upper=(1 - alpha1))
-        beta1 = BoundedNormal('beta1',
-                              mu=np.zeros(shape=shape),
-                              sd=1e6 * np.ones(shape=shape),
-                              shape=shape)
-        mu = Normal('mu', mu=np.zeros(shape=shape), sd=1e6 * np.ones(shape=shape), shape=shape)
+        beta1 = BoundedNormal('beta1', mu=0., sd=1e6, shape=shape)
+        mu = Normal('mu', mu=0., sd=1e6, shape=shape)
         theta = tt.sqrt(alpha0 + alpha1 * tt.pow(r - mu, 2) +
                         beta1 * tt.pow(sigma1, 2))
         Normal('obs', mu, sd=theta, observed=r)
@@ -55,7 +52,7 @@ def run(n=1000):
     if n == "short":
         n = 50
     with get_garch_model():
-        tr = sample(n, n_init=10000)
+        tr = sample(n, init=None)
     return tr
 
 
diff --git a/pymc3/examples/lightspeed_example.py b/pymc3/examples/lightspeed_example.py
index 3dd27f14f5..23c09f728b 100644
--- a/pymc3/examples/lightspeed_example.py
+++ b/pymc3/examples/lightspeed_example.py
@@ -23,21 +23,10 @@
     # define likelihood
     y_obs = pm.Normal('Y_obs', mu=mu, sd=sigma, observed=light_speed)
 
-    # inference fitting the model
-
-    # I have to use slice because the following command
-    # trace = pm.sample(5000)
-    # produce the error
-    # ValueError: Cannot construct a ufunc with more than 32 operands
-    # (requested number were: inputs = 51 and outputs = 1)valueerror
-
 
 def run(n=5000):
     with model_1:
-        xstart = pm.find_MAP()
-        xstep = pm.Slice()
-        trace = pm.sample(5000, xstep, start=xstart,
-                          random_seed=123, progressbar=True)
+        trace = pm.sample(n)
 
         pm.summary(trace)
 
diff --git a/pymc3/examples/simpletest.py b/pymc3/examples/simpletest.py
index 1a3bd229da..d67f63176b 100644
--- a/pymc3/examples/simpletest.py
+++ b/pymc3/examples/simpletest.py
@@ -1,4 +1,3 @@
-import matplotlib.pyplot as plt
 import pymc3 as pm
 import numpy as np
 
@@ -9,31 +8,18 @@
 data = np.random.normal(size=(2, 20))
 
 
-model = pm.Model()
-
-with model:
-    x = pm.Normal('x', mu=.5, tau=2. ** -2, shape=(2, 1))
+with pm.Model() as model:
+    x = pm.Normal('x', mu=.5, sd=2., shape=(2, 1))
     z = pm.Beta('z', alpha=10, beta=5.5)
-    d = pm.Normal('data', mu=x, tau=.75 ** -2, observed=data)
-    step = pm.NUTS()
+    d = pm.Normal('data', mu=x, sd=.75, observed=data)
 
 
 def run(n=1000):
     if n == "short":
         n = 50
     with model:
-        trace = pm.sample(n, step)
-
-    plt.subplot(2, 2, 1)
-    plt.plot(trace[x][:, 0, 0])
-    plt.subplot(2, 2, 2)
-    plt.hist(trace[x][:, 0, 0])
-
-    plt.subplot(2, 2, 3)
-    plt.plot(trace[x][:, 1, 0])
-    plt.subplot(2, 2, 4)
-    plt.hist(trace[x][:, 1, 0])
-    plt.show()
+        trace = pm.sample(n)
+    pm.traceplot(trace, varnames=['x'])
 
 if __name__ == '__main__':
     run()