#!/usr/bin/env python # coding: utf-8 # ## Chapter 5 - Bayes' Rule # In[1]: import numpy as np import matplotlib.pyplot as plt plt.style.use('seaborn-white') color = '#87ceeb' # In[2]: # helper function to return an optionally truncated normal distribution def normal(mu, sigma=.75, nvals=11, lbound=0, ubound=1): dist = [] for x in np.linspace(lbound,ubound,nvals): dist.append(np.exp(-np.power(x - mu, 2.) / (2 * np.power(sigma, 2.)))) return dist # ### Set up # Here, we create a vector of values that $\theta$ can take on. This is the "grid" laid down on the parameter space. It's a simple grid (only 1 dimension), but that's why what will follow is appropriately called a grid approximation. By tweaking `n_theta_vals`, this grid can be course-grained (small values of `n_theta_vals`) or it can be very fine-grained (large values of `n_theta_vals`). Start with small values to get a bit of an understading of how the approximation works and then crank it up to approach an exact solution. # In[3]: n_theta_vals = 11 #n_theta_vals = 1001 theta = np.linspace(0, 1, n_theta_vals) # ### Prior # `p_theta` is the vector of prior probabilities, each associated with the corresponding value of $\theta$ that we just created. There are several alternative priors specified here: # # - Uniform: specifies a belief that all values of $\theta$ are equally credible # - Triangle: values of $\theta$ near 0.5, are more credible with credibility steadily declining as $\theta$ moves away from 0.5 # - (Truncated) Normal: a conventional normal distribution, truncated to the permissible range of $\theta$ values (i.e., 0-1) # # Try out each of them. Tweak the mean and standard deviation of the normal distribution to see how they influence the posterior. # In[4]: # uniform prior p_theta = np.ones_like(theta) # triangle prior #p_theta = np.minimum(theta, 1-theta) # (truncated) normal priors # wide prior #p_theta = normal(.5, 10, n_theta_vals) # narrow prior #p_theta = normal(.5, .04, n_theta_vals) # normalize the vector so that priors sum to 1 (making them proper probabilities) p_theta = p_theta / np.sum(p_theta) # ### Data # This constructs a set of flip outcomes. Specify the number of heads (i.e., `n_heads`) and the number of tails (i.e., `n_tails`). There are three scenarios prepared: # # 1. 1 flip that comes up heads # 2. 4 flips, 1 of which comes up heads (25% heads) # 3. 40 flips, 10 of which come up heads (25% heads) # In[5]: # example 1 n_heads = 1 n_tails = 0 # example 2 #n_heads = 1 #n_tails = 3 # example 3 #n_heads = 10 #n_tails = 30 data = np.repeat([1, 0], [n_heads, n_tails]) # ### Likelihood # Here, we calculate the likelihood of the data we just constructed (i.e., $p(data|\theta)$). We will use a Bernoulli distribution and apply it to each of the values of $\theta$ we are entertaining. We then multiply each likelihood by the prior associated with the corresponding value of $\theta$ and sum all of these values to arrive as what is sometimes called the _evidence_ (i.e., $p(data)$, or the denominator of Bayes' rule). # In[6]: p_data_given_theta = (theta**n_heads) * ( (1-theta)**n_tails ) # calculate the evidence (P(D), the prior probability of the data) p_data = np.sum(p_data_given_theta * p_theta) # ### Inference # Here, we put it all together, by applying Bayes' rule to each prior/likelihood pair to generate a corresponding posterior for each value of $\theta$ we are entertaining. # In[7]: p_theta_given_data = p_data_given_theta * p_theta / p_data # ### Visualize # Plot the prior, the likelihood, and the posterior. # In[8]: fig = plt.figure(figsize=(8, 8)) plt.subplots_adjust(hspace = 1.1) # Plot the prior, the likelihood, and the posterior: for i,dist in enumerate([p_theta, p_data_given_theta, p_theta_given_data]): plt.subplot(3, 1, i+1) markerline, stemlines, baseline = plt.stem(theta, dist, markerfmt=' ', basefmt=' ', use_line_collection=True) plt.setp(stemlines, 'linewidth', 3) plt.xlim(0, 1) plt.xlabel('$\\theta$') # prior plt.axes(fig.axes[0]) plt.xlabel(r'$\theta$') plt.ylabel('$P(\\theta)$') plt.title('Prior', weight='bold') # likelihood plt.axes(fig.axes[1]) plt.xlabel(r'$\theta$') plt.ylabel('$P(D|\\theta)$') plt.title('Likelihood', weight='bold') plt.text(0.1, np.max(p_data_given_theta)/2, 'D = %sH,%sT' % (n_heads, n_tails)) # posterior plt.axes(fig.axes[2]) plt.xlabel(r'$\theta$') plt.ylabel('$P(\\theta|D)$') plt.title('Posterior', weight='bold') plt.text(0.1, np.max(p_theta_given_data)/2, 'P(D) = %g' % p_data) plt.show()