Logit versus Poisson
164 lines
# ---- fit logit model with optim() ----
# ---- fit poisson model with optim() ----
# data
# data; see ?crdata::holland2015
devtools::install_github("jrnold/ZeligData")
holland <- crdata::holland2015 |>
turnout <- ZeligData::turnout
filter(city == "santiago")
# formula
# formula corresponds to model 1 for each city in holland (2015) table 2
f <- vote ~ age + educate + income + race
f <- operations ~ lower + vendors + budget + population
# ---- create a function to fit the model ----
# ---- create a function to fit the model ----
# log-likelihood function
# log-likelihood function
logit_ll <- function(beta, y, X) {
poisson_ll <- function(beta, y, X) {
linpred <- X%*%beta # perhaps denoted eta
linpred <- X%*%beta # perhaps denoted eta
p <- plogis(linpred) # pi is special in R, so I use p
lambda <- exp(linpred)
ll <- sum(dbinom(y, size = 1, prob = p, log = TRUE))
ll <- sum(dpois(y, lambda = lambda, log = TRUE))
return(ll)
return(ll)
}
}
# function to fit model
# function to fit model
est_logit <- function(f, data) {
est_poisson <- function(f, data) {
# make X and y
# make X and y
mf <- model.frame(f, data = data)
mf <- model.frame(f, data = data)
X <- model.matrix(f, data = mf)
X <- model.matrix(f, data = mf)
y <- model.response(mf)
y <- model.response(mf)
# create starting values
# create starting values
par_start <- rep(0, ncol(X))
par_start <- rep(0, ncol(X))
# run optim()
# run optim()
est <- optim(par_start,
est <- optim(par_start,
fn = logit_ll,
fn = poisson_ll,
y = y,
y = y,
X = X,
X = X,
hessian = TRUE, # for SEs!
hessian = TRUE, # for SEs!
control = list(fnscale = -1),
control = list(fnscale = -1),
method = "BFGS")
method = "BFGS")
# check convergence; print warning if not
# check convergence; print warning if not
if (est$convergence != 0) print("Model did not converge!")
if (est$convergence != 0) print("Model did not converge!")
# create list of objects to return
# create list of objects to return
res <- list(beta_hat = est$par,
res <- list(beta_hat = est$par,
var_hat = solve(-est$hessian))
var_hat = solve(-est$hessian))
# return the list
# return the list
return(res)
return(res)
}
}
# fit model
# fit model
fit <- est_logit(f, data = turnout)
fit <- est_poisson(f, data = holland)
print(fit, digits = 2) # print estimates w/ reasonable digits
print(fit, digits = 2) # print estimates w/ reasonable digits
# ---- compute the expected value given X_c ----
# ---- compute the expected value given X_c ----
# create chosen values for X
# create chosen values for X
# note 1: naming columns helps a bit later
# note 1: naming columns helps a bit later
# note 2: can also do with f, model.matrix(..., newdata = ...)
# note 2: can also do with f, model.matrix(..., newdata = ...)
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"age" = median(turnout$age),
"lower" = median(holland$lower),
"educate" = median(turnout$educate),
"vendors" = median(holland$vendors),
"income" = median(turnout$income),
"budget" = median(holland$budget),
"white" = 1 # white indicators = 1
"population" = median(holland$population)
)
)
# function to compute qi
# function to compute qi
ev_fn <- function(beta, X) {
ev_fn <- function(beta, X) {
plogis(X%*%beta)
exp(X%*%beta)
}
}
# invariance property
# invariance property
ev_hat <- ev_fn(fit$beta_hat, X_c)
ev_hat <- ev_fn(fit$beta_hat, X_c)
# delta method
# delta method
library(numDeriv) # for grad()
library(numDeriv) # for grad()
grad <- grad(
grad <- grad(
func = ev_fn, # what function are we taking the derivative of?
func = ev_fn, # what function are we taking the derivative of?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
X = X_c) # what other values are needed?
X = X_c) # what other values are needed?
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)
# ---- compute the ev given X_c (w/ range of values) ----
# ---- compute the ev given X_c (w/ range of values) ----
# create chosen values for X
# create chosen values for X
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"age" = min(turnout$age):max(turnout$age),
"lower" = seq(min(holland$lower), max(holland$lower), by = 1),
"educate" = median(turnout$educate),
"vendors" = median(holland$vendors),
"income" = median(turnout$income),
"budget" = median(holland$budget),
"white" = 1 # white indicators = 1
"population" = median(holland$population)
)
)
# containers for estimated quantities of interest and ses
# containers for estimated quantities of interest and ses
ev_hat <- numeric(nrow(X_c))
ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))
# loop over each row of X_c and compute qi and se
# loop over each row of X_c and compute qi and se
for (i in 1:nrow(X_c)) { # for the ith row of X...
for (i in 1:nrow(X_c)) { # for the ith row of X...
# invariance property
# invariance property
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
# delta method
# delta method
grad <- grad(
grad <- grad(
func = ev_fn,
func = ev_fn,
x = fit$beta_hat,
x = fit$beta_hat,
X = X_c[i, ])
X = X_c[i, ])
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
}
}
# put X_c, qi estimates, and se estimates in data frame
# put X_c, qi estimates, and se estimates in data frame
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
data.frame() |>
data.frame() |>
glimpse()
glimpse()
# plot
# plot
ggplot(qi, aes(x = age, y = ev_hat,
ggplot(qi, aes(x = lower, y = ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymax = ev_hat + 1.64*se_ev_hat)) +
ymax = ev_hat + 1.64*se_ev_hat)) +
geom_ribbon() +
geom_ribbon() +
geom_line()
geom_line()
# ---- compute first difference ----
# ---- compute first difference ----
# make X_lo
# make X_lo
X_lo <- cbind(
X_lo <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"age" = quantile(turnout$age, probs = 0.25), # 31 years old; 25th percentile
"lower" = quantile(holland$lower, probs = 0.25),
"educate" = median(turnout$educate),
"vendors" = median(holland$vendors),
"income" = median(turnout$income),
"budget" = median(holland$budget),
"white" = 1 # white indicators = 1
"population" = median(holland$population)
)
)
# make X_hi by modifying the relevant value of X_lo
# make X_hi by modifying the relevant value of X_lo
X_hi <- X_lo
X_hi <- X_lo
X_hi[, "age"] <- quantile(turnout$age, probs = 0.75) # 59 years old; 75th percentile
X_hi[, "lower"] <- quantile(holland$lower, probs = 0.75)
# function to compute first difference
# function to compute first difference
fd_fn <- function(beta, hi, lo) {
fd_fn <- function(beta, hi, lo) {
plogis(hi%*%beta) - plogis(lo%*%beta)
exp(hi%*%beta) - exp(lo%*%beta)
}
}
# invariance property
# invariance property
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)
# delta method
# delta method
grad <- grad(
grad <- grad(
func = fd_fn,
func = fd_fn,
x = fit$beta_hat,
x = fit$beta_hat,
hi = X_hi,
hi = X_hi,
lo = X_lo)
lo = X_lo)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)
# estimated fd
# estimated fd
fd_hat
fd_hat
# estimated se
# estimated se
se_fd_hat
se_fd_hat
# 90% ci
# 90% ci
fd_hat - 1.64*se_fd_hat # lower
fd_hat - 1.64*se_fd_hat # lower
fd_hat + 1.64*se_fd_hat # upper
fd_hat + 1.64*se_fd_hat # upper