#!/usr/bin/env python
# coding: utf-8

# ## Chapter 18 - Metric Predicted Variable with Multiple Metric Predictors

# - [18.1 - Multiple Linear Regression](#18.1---Multiple-Linear-Regression)
#  - [18.1.4 - Redundant predictors](#18.1.4---Redundant-predictors)
# - [18.2 - Multiplicative Interaction of Metric Predictors](#18.2---Multiplicative-Interaction-of-Metric-Predictors)

# In[1]:


# %load ../../standard_import.txt
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib import gridspec
from IPython.display import Image

get_ipython().run_line_magic('matplotlib', 'inline')
plt.style.use('seaborn-white')

color = '#87ceeb'
f_dict = {'size':16}


# In[2]:


get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', '-p matplotlib,numpy,pandas,pymc,seaborn')


# ### 18.1 - Multiple Linear Regression
# #### Data

# In[3]:


df = pd.read_csv('data/Guber1999data.csv')
df.info()


# In[4]:


df.head()


# In[5]:


X = df[['Spend', 'PrcntTake']]
y = df['SATT']

meanx = X.mean().to_numpy()
scalex = X.std().to_numpy()
zX = ((X-meanx)/scalex).to_numpy()

meany = y.mean()
scaley = y.std()
zy = ((y-meany)/scaley).to_numpy()


# #### Model (Kruschke, 2015)

# In[6]:


Image('images/fig18_4.png', width=400)


# In[7]:


with pm.Model() as model:
    
    zbeta0 = pm.Normal('zbeta0', mu=0, sigma=2)
    #zbeta1 = pm.Normal('zbetaj1', mu=0, sigma=2)
    #zbeta2 = pm.Normal('zbetaj2', mu=0, sigma=2)
    #zmu =  zbeta0 + (zbeta1 * X[:,0]) + (zbeta2 * X[:,1])
    zbetaj = pm.Normal('zbetaj', mu=0, sigma=2, shape=(2))
    zmu =  zbeta0 + pm.math.dot(zbetaj, zX.T)
        
    nu = pm.Exponential('nu', 1/29.)
    zsigma = pm.Uniform('zsigma', 10**-5, 10)
    
    likelihood = pm.StudentT('likelihood', nu=nu, mu=zmu, lam=1/zsigma**2, observed=zy)


# In[8]:


with model:
    idata = pm.sample(10000, chains=4, target_accept=0.9)


# In[9]:


with model:
    az.plot_trace(idata);


# #### Figure 18.5

# In[10]:


# Transform parameters back to original scale
beta0 = idata.posterior['zbeta0']*scaley + meany - (idata.posterior['zbetaj']*meanx/scalex).sum(axis=2)*scaley
betaj = (idata.posterior['zbetaj']/scalex)*scaley
scale = (idata.posterior['zsigma']*scaley)

intercept = beta0
spend = betaj.sel(zbetaj_dim_0=0)
prcnttake =  betaj.sel(zbetaj_dim_0=1)
#normality = np.log10(trace['nu'])
normality = idata.posterior['nu']

fig, ([ax1, ax2, ax3], [ax4, ax5, ax6]) = plt.subplots(2,3, figsize=(12,6))


for ax, estimate, title, xlabel in zip(fig.axes,
                               [intercept, spend, prcnttake, scale, normality],
                               ['Intercept', 'Spend', 'PrcntTake', 'Scale', 'Normality'],
                               [r'$\beta_0$', r'$\beta_1$', r'$\beta_2$', r'$\sigma$', r'$\nu$']):
    az.plot_posterior(estimate, point_estimate='mode', ax=ax, color=color)
    ax.set_title(title, fontdict=f_dict)
    ax.set_xlabel(xlabel, fontdict=f_dict)

fig.tight_layout()
ax6.set_visible(False)


# Below we create the scatterplots of figure 18.5 using `pairplot()` in seaborn and then tweak the lower triangle.

# In[11]:


# DataFrame with the columns in correct order
pair_plt = pd.DataFrame({'Intercept':intercept.values.flatten(),
                         'Spend':spend.values.flatten(),
                         'PrcntTake': prcnttake.values.flatten(),
                         'Scale':scale.values.flatten(),
                         'Normality': normality.values.flatten()},
                        columns=['Intercept', 'Spend', 'PrcntTake', 'Scale', 'Normality'])

# Correlation coefficients
corr = np.round(np.corrcoef(pair_plt, rowvar=0), decimals=3)
# Indexes of the lower triangle, below the diagonal
lower_idx = np.tril(corr, -1).nonzero()

# The seaborn pairplot
pgrid = sns.pairplot(pair_plt, plot_kws={'edgecolor':color, 'facecolor':'none'})

# Replace the plots on the diagonal with the parameter names
for i, ax in enumerate(pgrid.diag_axes):
    ax.clear()
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.text(.5,.5, pair_plt.columns[i], transform=ax.transAxes, fontdict={'size':16, 'weight':'bold'}, ha='center') 

# Replace the lower triangle with the correlation coefficients
for i, ax in enumerate(pgrid.axes[lower_idx]):
    #ax.clear()
    ax.collections[0].remove()
    ax.xaxis.set_visible(False)
    #ax.yaxis.set_visible(False)
    ax.text(.5,.5, corr[lower_idx][i], transform=ax.transAxes, fontdict=f_dict, ha='center')


# ### 18.1.4 - Redundant predictors

# In[12]:


X2 = X.assign(PropNotTake = (100-df['PrcntTake']) / 100)

meanx2 = X2.mean().to_numpy()
scalex2 = X2.std().to_numpy()
zX2 = ((X2-meanx2)/scalex2).to_numpy()


# In[13]:


sns.pairplot(X2, plot_kws={'edgecolor':color, 'facecolor':'none'})


# In[14]:


with pm.Model() as model2:

    zbeta0 = pm.Normal('zbeta0', mu=0, sigma=2)
    zbetaj = pm.Normal('zbetaj', mu=0, sigma=2, shape=(3))
    zmu =  zbeta0 + pm.math.dot(zbetaj, zX2.T)

    nu = pm.Exponential('nu', 1/29.)
    zsigma = pm.Uniform('zsigma', 10**-5, 10)

    likelihood = pm.StudentT('likelihood', nu=nu, mu=zmu, lam=1/zsigma**2, observed=zy)    


# In[15]:


with model2:
    idata2 = pm.sample(5000, chains=4, target_accept=0.9)


# In[16]:


with model2:
    az.plot_trace(idata2);


# **Note**: The trace/histogram above for `zbetaj` may look like one of the chains got stuck (i.e., all its values are piled up just above zero).  However, paying attention to the plotting, you can actually tell that it isn't a *chain* that is piled up above zero, it's is one of the parameters in the *array* of parameters stored in `zbetaj`.  Specifically, it is the first parameter, which is the coefficient associated with the *Spend* variable.

# #### Figure 18.6

# In[17]:


# Transform parameters back to original scale
beta0 = idata2.posterior['zbeta0']*scaley + meany - (idata2.posterior['zbetaj']*meanx2/scalex2).sum(axis=2)*scaley
betaj = (idata2.posterior['zbetaj']/scalex2)*scaley
scale = (idata2.posterior['zsigma']*scaley)

intercept = beta0
spend = betaj.sel(zbetaj_dim_0=0)
prcnttake =  betaj.sel(zbetaj_dim_0=1)
propnottake =  betaj.sel(zbetaj_dim_0=2)
#normality = np.log10(trace2['nu'])
normality = idata2.posterior['nu']

fig, ([ax1, ax2, ax3], [ax4, ax5, ax6]) = plt.subplots(2,3, figsize=(12,6))

for ax, estimate, title, xlabel in zip(fig.axes,
                               [intercept, spend, prcnttake, propnottake, scale, normality],
                               ['Intercept', 'Spend', 'PrcntTake', 'PropNotTake', 'Scale', 'Normality'],
                               [r'$\beta_0$', r'$\beta_1$', r'$\beta_2$', r'$\beta_3$', r'$\sigma$', r'log10($\nu$)']):
    pm.plot_posterior(estimate, point_estimate='mode', ax=ax, color=color)
    ax.set_title(title, fontdict=f_dict)
    ax.set_xlabel(xlabel, fontdict=f_dict)

plt.tight_layout()


# In[18]:


# DataFrame with the columns in correct order
pair_plt = pd.DataFrame({'Intercept':intercept.values.flatten(),
                         'Spend':spend.values.flatten(),
                         'PrcntTake': prcnttake.values.flatten(),
                         'PropNotTake': propnottake.values.flatten(),
                         'Scale':scale.values.flatten(),
                         'Normality': normality.values.flatten()},
                        columns=['Intercept', 'Spend', 'PrcntTake', 'PropNotTake', 'Scale', 'Normality'])

# Correlation coefficients
corr = np.round(np.corrcoef(pair_plt, rowvar=0), decimals=3)
# Indexes of the lower triangle, below the diagonal
lower_idx = np.tril(corr, -1).nonzero()

# The seaborn pairplot
pgrid = sns.pairplot(pair_plt, plot_kws={'edgecolor':color, 'facecolor':'none'})

# Replace the plots on the diagonal with the parameter names
for i, ax in enumerate(pgrid.diag_axes):
    ax.clear()
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.text(.5,.5, pair_plt.columns[i], transform=ax.transAxes, fontdict={'size':16, 'weight':'bold'}, ha='center') 

# Replace the lower triangle with the correlation coefficients
for i, ax in enumerate(pgrid.axes[lower_idx]):
    #ax.clear()
    ax.collections[0].remove()
    ax.xaxis.set_visible(False)
    #ax.yaxis.set_visible(False)
    ax.text(.5,.5, corr[lower_idx][i], transform=ax.transAxes, fontdict=f_dict, ha='center')


# ### 18.2 - Multiplicative Interaction of Metric Predictors

# In[19]:


X3 = X.assign(SpendXPrcnt = lambda x: x.Spend * x.PrcntTake)

meanx3 = X3.mean().to_numpy()
scalex3 = X3.std().to_numpy()
zX3 = ((X3-meanx3)/scalex3).to_numpy()


# In[20]:


# Correlation matrix
X3.corr()


# In[21]:


with pm.Model() as model3:

    zbeta0 = pm.Normal('zbeta0', mu=0, sigma=2)
    zbetaj = pm.Normal('zbetaj', mu=0, sigma=2, shape=(3))
    zmu =  zbeta0 + pm.math.dot(zbetaj, zX3.T)

    nu = pm.Exponential('nu', 1/30.)
    zsigma = pm.Uniform('zsigma', 10**-5, 10)

    likelihood = pm.StudentT('likelihood', nu=nu, mu=zmu, lam=1/zsigma**2, observed=zy)


# In[22]:


with model3:
    idata3 = pm.sample(20000, chains=4, target_accept=0.9)


# #### Figure 18.9

# In[23]:


# Transform parameters back to original scale
beta0 = idata3.posterior['zbeta0']*scaley + meany - (idata3.posterior['zbetaj']*meanx3/scalex3).sum(axis=2)*scaley
betaj = (idata3.posterior['zbetaj']/scalex3)*scaley
scale = (idata3.posterior['zsigma']*scaley)

intercept = beta0
spend = betaj.sel(zbetaj_dim_0=0)
prcnttake =  betaj.sel(zbetaj_dim_0=1)
spendxprcnt =  betaj.sel(zbetaj_dim_0=2)
#normality = np.log10(trace3['nu'])
normality = idata3.posterior['nu']

fig, ([ax1, ax2, ax3], [ax4, ax5, ax6]) = plt.subplots(2,3, figsize=(12,6))

for ax, estimate, title, xlabel in zip(fig.axes,
                               [intercept, spend, prcnttake, spendxprcnt, scale, normality],
                               ['Intercept', 'Spend', 'PrcntTake', 'SpendXPrcnt', 'Scale', 'Normality'],
                               [r'$\beta_0$', r'$\beta_1$', r'$\beta_2$', r'$\beta_3$', r'$\sigma$', r'$\nu$']):
    az.plot_posterior(estimate, point_estimate='mode', ax=ax, color=color)
    ax.set_title(title, fontdict=f_dict)
    ax.set_xlabel(xlabel, fontdict=f_dict)

plt.tight_layout()


# In[24]:


spendxprcnt


# In[25]:


fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10,8))

# Slope on Spend
prcnttake_values = np.linspace(4, 80, 20).reshape(1,-1)
spend_slope = spend.values.flatten() + spendxprcnt.values.flatten()*prcnttake_values.reshape(-1,1)
hpds = np.array([az.hdi(spend_slope[i,:]) for i in range(len(prcnttake_values))])
spend_slope_medians = spend_slope.mean(axis=1)

ax1.errorbar(prcnttake_values.ravel(), spend_slope_medians, yerr=hpds[:,1]-hpds[:,0],
             color=color, linestyle='None', marker='o', markersize=10)
ax1.axhline(linestyle='dotted', color='k')
ax1.set_xlim(0,82)
ax1.set(xlabel='Value of PrcntTake', ylabel='Slope on Spend')

# Slope on PrcntTake
spend_values = np.linspace(3.75, 9.75, 20)
prcnttake_slope = prcnttake.values.flatten() + spendxprcnt.values.flatten()*spend_values.reshape(-1,1)
hpds2 = np.array([az.hdi(prcnttake_slope[i,:]) for i in range(len(prcnttake_values))])
prcnttake_slope_medians = prcnttake_slope.mean(axis=1)

ax2.errorbar(spend_values.ravel(), prcnttake_slope_medians, yerr=hpds2[:,1]-hpds2[:,0],
             color=color, linestyle='None', marker='o', markersize=10)
ax2.set_xlim(3.5,10)
ax2.set(xlabel='Value of Spend', ylabel='Slope on PrcntTake');


# ### 18.3 - Shrinkage of Regression Coefficients

# In *`R`* I ran the first 46 lines of code in the script `Jags-Ymet-XmetMulti-MrobustShrink-Example.R` to generate the exact same data used in the book. I exported the resulting data frame `myData` to a csv file.

# In[26]:


df_shrink = pd.read_csv('data/18_3shrinkage.csv')
df_shrink.info()


# In[27]:


# check bivariate associations between SATT and other variables
df_shrink.corrwith(df_shrink['SATT'])


# In[28]:


# Select the predictor columns: Spend, PrcntTake and the 12 randonly generated predictors
#X4 = df_shrink.iloc[:, np.r_[[1,4], np.arange(8,20)]]
X4 = df_shrink.drop(columns=['State','StuTeaRat', 'Salary', 'SATV', 'SATM', 'SATT'])
y4 = df_shrink.SATT

meanx4 = X4.mean().to_numpy()
scalex4 = X4.std().to_numpy()
zX4 = ((X4-meanx4)/scalex4).to_numpy()

meany4 = y4.mean()
scaley4 = y4.std()
zy4 = ((y4-meany4)/scaley4).to_numpy()


# ##### Hierarchical model with fixed, independent, vague normal priors

# #### Model (Kruschke, 2015)

# In[29]:


Image('images/fig18_4.png', width=400)


# In[30]:


with pm.Model() as model4:

    zbeta0 = pm.Normal('zbeta0', mu=0, sigma=2)
    zbetaj = pm.Normal('zbetaj', mu=0, sigma=2, shape=(zX4.shape[1]))
    zmu =  zbeta0 + pm.math.dot(zbetaj, zX4.T)

    nu = pm.Exponential('nu', 1/29.)
    zsigma = pm.Uniform('zsigma', 10**-5, 10)

    likelihood = pm.StudentT('likelihood', nu=nu, mu=zmu, lam=1/zsigma**2, observed=zy4)


# In[31]:


with model4:
    idata4 = pm.sample(5000, chains=4, target_accept=0.9)


# In[32]:


with model4:
    az.plot_trace(idata4);


# In[33]:


# probability that the spend coefficient is greater than zero
(idata4.posterior['zbetaj'].sel(zbetaj_dim_0=0) > 0).mean()


# #### Figure 18.11

# In[34]:


# Transform parameters back to original scale
beta0 = idata4.posterior['zbeta0']*scaley4 + meany4 - (idata4.posterior['zbetaj']*meanx4/scalex4).sum(axis=2)*scaley4
betaj = (idata4.posterior['zbetaj']/scalex4)*scaley4
scale = (idata4.posterior['zsigma']*scaley4)

intercept = beta0
spend = betaj.sel(zbetaj_dim_0=0)
prcnttake =  betaj.sel(zbetaj_dim_0=1)
#normality = np.log10(idata4.posterior['nu'])
normality = idata4.posterior['nu']

fig, axes = plt.subplots(4,3, figsize=(12,10))

# Intercept
pm.plot_posterior(intercept, point_estimate='mode', ax=axes.flatten()[0], color=color)
axes.flatten()[0].set_title('Intercept', fontdict=f_dict)

# Spend & PrcntTale
pm.plot_posterior(spend, point_estimate='mode', ax=axes.flatten()[1], color=color)
axes.flatten()[1].set_title('Spend', fontdict=f_dict)
az.plot_posterior(prcnttake, point_estimate='mode', ax=axes.flatten()[2], color=color)
axes.flatten()[2].set_title('PrcntTake', fontdict=f_dict)

# Randomly generated predictors
for ax, j in enumerate([2,3,4,11,12,13]):
    pm.plot_posterior(betaj.sel(zbetaj_dim_0=j), point_estimate='mode', ax=axes.flatten()[ax+3], color=color)
    axes.flatten()[ax+3].set_title(X4.columns[j], fontdict=f_dict)

# Scale
az.plot_posterior(scale, point_estimate='mode', ax=axes.flatten()[9], color=color)
axes.flatten()[9].set_title(r'$\sigma$', fontdict=f_dict)

# Normality
az.plot_posterior(normality, point_estimate='mode', ax=axes.flatten()[10], color=color)
axes.flatten()[10].set_title(r'$\nu$', fontdict=f_dict);

axes.flatten()[11].set_visible(False)

fig.tight_layout()


# In[35]:


with pm.Model() as model5:

    zbeta0 = pm.Normal('zbeta0', mu=0, sigma=2)
    zbetajscale = pm.Gamma('zbetajscale', mu=1, sigma=1)
    zbetaj = pm.StudentT('zbetaj', mu=0, sigma=zbetajscale, nu=1, shape=(zX4.shape[1]))
    zmu =  zbeta0 + pm.math.dot(zbetaj, zX4.T)

    nu = pm.Exponential('nu', 1/29.)
    zsigma = pm.Uniform('zsigma', 10**-5, 10)

    likelihood = pm.StudentT('likelihood', nu=nu, mu=zmu, lam=1/zsigma**2, observed=zy4)


# In[36]:


with model5:
    idata5 = pm.sample(10000, chains=4, target_accept=0.9)


# In[37]:


# probability that the spend coefficient is greater than zero
(idata5.posterior['zbetaj'].sel(zbetaj_dim_0=0) > 0).mean()


# #### Figure 18.12

# In[38]:


# Transform parameters back to original scale
beta0 = idata5.posterior['zbeta0']*scaley4 + meany4 - (idata5.posterior['zbetaj']*meanx4/scalex4).sum(axis=2)*scaley4
betaj = (idata5.posterior['zbetaj']/scalex4)*scaley4
scale = (idata5.posterior['zsigma']*scaley4)

intercept = beta0
spend = betaj.sel(zbetaj_dim_0=0)
prcnttake =  betaj.sel(zbetaj_dim_0=1)
#normality = np.log10(idata5.posterior['nu'])
normality = idata5.posterior['nu']

fig, axes = plt.subplots(4,3, figsize=(12,10))

# Intercept
pm.plot_posterior(intercept, point_estimate='mode', ax=axes.flatten()[0], color=color)
axes.flatten()[0].set_title('Intercept', fontdict=f_dict)

# Spend & PrcntTale
pm.plot_posterior(spend, point_estimate='mode', ax=axes.flatten()[1], color=color)
axes.flatten()[1].set_title('Spend', fontdict=f_dict)
az.plot_posterior(prcnttake, point_estimate='mode', ax=axes.flatten()[2], color=color)
axes.flatten()[2].set_title('PrcntTake', fontdict=f_dict)

# Randomly generated predictors
for ax, j in enumerate([2,3,4,11,12,13]):
    pm.plot_posterior(betaj.sel(zbetaj_dim_0=j), point_estimate='mode', ax=axes.flatten()[ax+3], color=color)
    axes.flatten()[ax+3].set_title(X4.columns[j], fontdict=f_dict)

# Scale
az.plot_posterior(scale, point_estimate='mode', ax=axes.flatten()[9], color=color)
axes.flatten()[9].set_title(r'$\sigma$', fontdict=f_dict)

# Normality
az.plot_posterior(normality, point_estimate='mode', ax=axes.flatten()[10], color=color)
axes.flatten()[10].set_title(r'$\nu$', fontdict=f_dict);

axes.flatten()[11].set_visible(False)

fig.tight_layout()


# In[ ]: