Question 4
2023-08-28
y <- c(0,1,2,3,1,5,10,17,23,31,20,25,37,45)
x <- c(1:14)
n <- length(x)
one <- c(rep(1,n))
X <- cbind(one,x)
beta <- matrix(c(0.3, 0.3))#starting values
r <- 10
iter <- 25
result <- matrix(0, iter, 8)
for (i in 1:iter){
eta = X%*%beta
mu = exp(eta)
v = mu*(mu+r)/r
W = diag(c((mu*r)/(mu+r)))
z = eta+(y-mu)/mu
XWX = t(X)%*%W%*%X
XWXI = solve(XWX)
XWZ = t(X)%*%W%*%z
beta = XWXI%*%XWZ
b0 = beta[1,]
b1 = beta[2,]
b_se = sqrt(diag(XWXI))
names(beta) = NULL
names(b_se) = NULL
# NR procedure for calculating r
eta = X%*%beta
mu = exp(eta)
l = sum(log(gamma(y + r))) - sum(log(gamma(y + 1))) - n*log(gamma(r)) + r*sum(log(r/(mu + r))) + sum(y
dl = sum(digamma(y + r)) - n*digamma(r) + sum(log(r/(mu + r))) + sum(mu/(r + mu)) - sum(y/(r + mu))
dl2 = sum(trigamma(y + r)) - n*trigamma(r) + sum(mu/(r*(r + mu))) - sum(mu/(r + mu)ˆ2) + sum(y/(r + mu
r = r - dl/dl2
r_se = sqrt(-1/dl2)
result[i, ] = c(i, r, r_se, l, b0, b1, b_se[1], b_se[2])
}
colnames(result) = c("iter", "r", "SE(r)", "logL", "beta_0", "beta_1", "SE(beta_0)", "SE(beta_1)")
round(result,3)
1
## iter r SE(r) logL beta_0 beta_1 SE(beta_0) SE(beta_1)
## [1,] 1 10.557 6.626 -39.047 -0.015 0.301 0.323 0.032
## [2,] 2 11.599 7.308 -38.905 -0.013 0.295 0.352 0.034
## [3,] 3 12.233 8.526 -38.886 0.021 0.291 0.349 0.034
## [4,] 4 12.479 9.330 -38.882 0.035 0.290 0.344 0.033
## [5,] 5 12.558 9.657 -38.881 0.039 0.289 0.342 0.033
## [6,] 6 12.581 9.763 -38.881 0.041 0.289 0.341 0.033
## [7,] 7 12.589 9.795 -38.881 0.042 0.289 0.341 0.033
## [8,] 8 12.591 9.805 -38.881 0.042 0.289 0.341 0.033
## [9,] 9 12.592 9.808 -38.881 0.042 0.289 0.341 0.033
## [10,] 10 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [11,] 11 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [12,] 12 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [13,] 13 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [14,] 14 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [15,] 15 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [16,] 16 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [17,] 17 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [18,] 18 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [19,] 19 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [20,] 20 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [21,] 21 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [22,] 22 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [23,] 23 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [24,] 24 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [25,] 25 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
# Tidying up with some dataframe manipulation
result <- [Link](result)
r_fin <- result[iter,]
data <- [Link](cbind(x,y))
glmNB <- [Link](y ~ x, control = [Link](maxit = 25, trace = T))
## Theta(1) = 17.086900, 2(Ls - Lm) = 16.831100
## Theta(2) = 13.402300, 2(Ls - Lm) = 15.387900
## Theta(3) = 13.402300, 2(Ls - Lm) = 15.368300
## Theta(4) = 12.760300, 2(Ls - Lm) = 15.072000
## Theta(5) = 12.760200, 2(Ls - Lm) = 15.071100
## Theta(6) = 12.627800, 2(Ls - Lm) = 15.008000
## Theta(7) = 12.627800, 2(Ls - Lm) = 15.008000
## Theta(8) = 12.599700, 2(Ls - Lm) = 14.994500
## Theta(9) = 12.599600, 2(Ls - Lm) = 14.994500
2
## Theta(10) = 12.593600, 2(Ls - Lm) = 14.991600
## Theta(11) = 12.593600, 2(Ls - Lm) = 14.991600
## Theta(12) = 12.592400, 2(Ls - Lm) = 14.991000
## Theta(13) = 12.592300, 2(Ls - Lm) = 14.990900
## Theta(14) = 12.592000, 2(Ls - Lm) = 14.990800
## Theta(15) = 12.592000, 2(Ls - Lm) = 14.990800
## Theta(16) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(17) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(18) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(19) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(20) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(21) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(22) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(23) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(24) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(25) = 12.591900, 2(Ls - Lm) = 14.990800
## Warning in [Link](y ~ x, control = [Link](maxit = 25, trace = T)):
## alternation limit reached
summary(glmNB)
##
## Call:
## [Link](formula = y ~ x, control = [Link](maxit = 25, trace = T),
## [Link] = 12.59189515, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04179 0.34097 0.123 0.902
## x 0.28892 0.03308 8.734 <2e-16 ***
## ---
## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
##
3
## (Dispersion parameter for Negative Binomial(12.5919) family taken to be 1)
##
## Null deviance: 104.160 on 13 degrees of freedom
## Residual deviance: 14.991 on 12 degrees of freedom
## AIC: 83.762
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 12.59
## Std. Err.: 9.81
## Warning while fitting theta: alternation limit reached
##
## 2 x log-likelihood: -77.762
pred_NB <- predict(glmNB, newdata = [Link](x), type = 'response')
pred_NB
## 1 2 3 4 5 6 7 8
## 1.391958 1.858250 2.480745 3.311770 4.421179 5.902230 7.879419 10.518945
## 9 10 11 12 13 14
## 14.042687 18.746847 25.026853 33.410599 44.602815 59.544313
var_func <- pred_NB*(pred_NB + r)/r
# Convergence Plots
p1 <- ggplot(data = result) +
aes(x = iter, y = beta_0) +
geom_line() +
labs(x = 'Iteration Number', y = 'beta_0')
p2 <- ggplot(data = result) +
aes(x = iter, y = beta_1) +
geom_line() +
labs(x = 'Iteration Number', y = 'beta_1')
p3 <- ggplot(data = result) +
aes(x = iter, y = r) +
geom_line() +
labs(x = 'Iteration Number', y = 'r')
p4 <- ggplot(data = result) +
aes(x = iter, y = logL) +
geom_line() +
labs(x = 'Iteration Number', y = 'log-likelihood value')
[Link](p1, p2, p3, p4, nrow = 2, ncol = 2, top = textGrob("Convergence Plots", gp = gpar(fontsize
4
Convergence Plots
0.04
0.300
beta_0
beta_1
0.02 0.296
0.00 0.292
0 5 10 15 20 25 0 5 10 15 20 25
Iteration Number Iteration Number
12.5
log−likelihood value
−38.90
12.0
−38.95
r
11.5
−39.00
11.0
10.5 −39.05
0 5 10 15 20 25 0 5 10 15 20 25
Iteration Number Iteration Number
# Fitted Line Plots
ggplot(data, aes(x = x, y = y)) +
geom_point() +
geom_smooth(method = "glm", [Link] = list(family = poisson), aes(colour = 'Poisson Mean'), linety
geom_line(aes(y = pred_NB, colour = 'Neg-Bin Mean'), linetype = 1) +
geom_line(aes(y = var_func, colour = 'Neg-Bin Variance'), linetype = 2) +
scale_colour_manual(name="Function",values= c('red', 'blue', 'black')) +
labs(title = 'Fitted Line Plots') +
theme([Link] = element_text(hjust = 0.5)) +
ylim(0, 80)
## ‘geom_smooth()‘ using formula = ’y ~ x’
## Warning: Removed 3 rows containing missing values (‘geom_line()‘).
5
Fitted Line Plots
80
60
Function
Neg−Bin Mean
40
y
Neg−Bin Variance
Poisson Mean
20
5 10
x
glmNB <- [Link](y ~ x, control = [Link](maxit = 25, trace = T))
## Theta(1) = 17.086900, 2(Ls - Lm) = 16.831100
## Theta(2) = 13.402300, 2(Ls - Lm) = 15.387900
## Theta(3) = 13.402300, 2(Ls - Lm) = 15.368300
## Theta(4) = 12.760300, 2(Ls - Lm) = 15.072000
## Theta(5) = 12.760200, 2(Ls - Lm) = 15.071100
## Theta(6) = 12.627800, 2(Ls - Lm) = 15.008000
## Theta(7) = 12.627800, 2(Ls - Lm) = 15.008000
## Theta(8) = 12.599700, 2(Ls - Lm) = 14.994500
## Theta(9) = 12.599600, 2(Ls - Lm) = 14.994500
## Theta(10) = 12.593600, 2(Ls - Lm) = 14.991600
## Theta(11) = 12.593600, 2(Ls - Lm) = 14.991600
6
Table of Estimates
Estimate Std. Error
beta_0 0.0417874 0.3409696
beta_1 0.2889238 0.0330796
r 12.5918952 9.8090134
## Theta(12) = 12.592400, 2(Ls - Lm) = 14.991000
## Theta(13) = 12.592300, 2(Ls - Lm) = 14.990900
## Theta(14) = 12.592000, 2(Ls - Lm) = 14.990800
## Theta(15) = 12.592000, 2(Ls - Lm) = 14.990800
## Theta(16) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(17) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(18) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(19) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(20) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(21) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(22) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(23) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(24) = 12.591900, 2(Ls - Lm) = 14.990800
## Theta(25) = 12.591900, 2(Ls - Lm) = 14.990800
## Warning in [Link](y ~ x, control = [Link](maxit = 25, trace = T)):
## alternation limit reached
sum <- [Link](summary(glmNB)$coefficients[,1:2])
rownames(sum) <- c('beta_0', 'beta_1')
mid <- [Link](cbind(summary(glmNB)$theta, summary(glmNB)$[Link]))
rownames(mid) <- 'r'
colnames(mid) <- colnames(sum)
tab <- rbind(sum, mid)
tab |> kbl() |> kable_styling() |> add_header_above(header = c("Table of Estimates" = 3))