Logit versus Poisson

Created Diff never expires
51 हटाए गए
लाइनें
कुल
हटाया गया
शब्द
कुल
हटाया गया
इस सुविधा का उपयोग जारी रखने के लिए, अपग्रेड करें
Diffchecker logo
Diffchecker Pro
164 लाइनें
51 जोड़े गए
लाइनें
कुल
जोड़ा गया
शब्द
कुल
जोड़ा गया
इस सुविधा का उपयोग जारी रखने के लिए, अपग्रेड करें
Diffchecker logo
Diffchecker Pro
164 लाइनें


# ---- fit logit model with optim() ----
# ---- fit poisson model with optim() ----


# data
# data; see ?crdata::holland2015
devtools::install_github("jrnold/ZeligData")
holland <- crdata::holland2015 |>
turnout <- ZeligData::turnout
filter(city == "santiago")


# formula
# formula corresponds to model 1 for each city in holland (2015) table 2
f <- vote ~ age + educate + income + race
f <- operations ~ lower + vendors + budget + population


# ---- create a function to fit the model ----
# ---- create a function to fit the model ----


# log-likelihood function
# log-likelihood function
logit_ll <- function(beta, y, X) {
poisson_ll <- function(beta, y, X) {
linpred <- X%*%beta # perhaps denoted eta
linpred <- X%*%beta # perhaps denoted eta
p <- plogis(linpred) # pi is special in R, so I use p
lambda <- exp(linpred)
ll <- sum(dbinom(y, size = 1, prob = p, log = TRUE))
ll <- sum(dpois(y, lambda = lambda, log = TRUE))
return(ll)
return(ll)
}
}


# function to fit model
# function to fit model
est_logit <- function(f, data) {
est_poisson <- function(f, data) {
# make X and y
# make X and y
mf <- model.frame(f, data = data)
mf <- model.frame(f, data = data)
X <- model.matrix(f, data = mf)
X <- model.matrix(f, data = mf)
y <- model.response(mf)
y <- model.response(mf)
# create starting values
# create starting values
par_start <- rep(0, ncol(X))
par_start <- rep(0, ncol(X))
# run optim()
# run optim()
est <- optim(par_start,
est <- optim(par_start,
fn = logit_ll,
fn = poisson_ll,
y = y,
y = y,
X = X,
X = X,
hessian = TRUE, # for SEs!
hessian = TRUE, # for SEs!
control = list(fnscale = -1),
control = list(fnscale = -1),
method = "BFGS")
method = "BFGS")
# check convergence; print warning if not
# check convergence; print warning if not
if (est$convergence != 0) print("Model did not converge!")
if (est$convergence != 0) print("Model did not converge!")
# create list of objects to return
# create list of objects to return
res <- list(beta_hat = est$par,
res <- list(beta_hat = est$par,
var_hat = solve(-est$hessian))
var_hat = solve(-est$hessian))
# return the list
# return the list
return(res)
return(res)
}
}


# fit model
# fit model
fit <- est_logit(f, data = turnout)
fit <- est_poisson(f, data = holland)
print(fit, digits = 2) # print estimates w/ reasonable digits
print(fit, digits = 2) # print estimates w/ reasonable digits


# ---- compute the expected value given X_c ----
# ---- compute the expected value given X_c ----


# create chosen values for X
# create chosen values for X
# note 1: naming columns helps a bit later
# note 1: naming columns helps a bit later
# note 2: can also do with f, model.matrix(..., newdata = ...)
# note 2: can also do with f, model.matrix(..., newdata = ...)
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"age" = median(turnout$age),
"lower" = median(holland$lower),
"educate" = median(turnout$educate),
"vendors" = median(holland$vendors),
"income" = median(turnout$income),
"budget" = median(holland$budget),
"white" = 1 # white indicators = 1
"population" = median(holland$population)
)
)


# function to compute qi
# function to compute qi
ev_fn <- function(beta, X) {
ev_fn <- function(beta, X) {
plogis(X%*%beta)
exp(X%*%beta)
}
}


# invariance property
# invariance property
ev_hat <- ev_fn(fit$beta_hat, X_c)
ev_hat <- ev_fn(fit$beta_hat, X_c)


# delta method
# delta method
library(numDeriv) # for grad()
library(numDeriv) # for grad()
grad <- grad(
grad <- grad(
func = ev_fn, # what function are we taking the derivative of?
func = ev_fn, # what function are we taking the derivative of?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
X = X_c) # what other values are needed?
X = X_c) # what other values are needed?
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)


# ---- compute the ev given X_c (w/ range of values) ----
# ---- compute the ev given X_c (w/ range of values) ----


# create chosen values for X
# create chosen values for X
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"age" = min(turnout$age):max(turnout$age),
"lower" = seq(min(holland$lower), max(holland$lower), by = 1),
"educate" = median(turnout$educate),
"vendors" = median(holland$vendors),
"income" = median(turnout$income),
"budget" = median(holland$budget),
"white" = 1 # white indicators = 1
"population" = median(holland$population)
)
)


# containers for estimated quantities of interest and ses
# containers for estimated quantities of interest and ses
ev_hat <- numeric(nrow(X_c))
ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))


# loop over each row of X_c and compute qi and se
# loop over each row of X_c and compute qi and se
for (i in 1:nrow(X_c)) { # for the ith row of X...
for (i in 1:nrow(X_c)) { # for the ith row of X...
# invariance property
# invariance property
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
# delta method
# delta method
grad <- grad(
grad <- grad(
func = ev_fn,
func = ev_fn,
x = fit$beta_hat,
x = fit$beta_hat,
X = X_c[i, ])
X = X_c[i, ])
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
}
}


# put X_c, qi estimates, and se estimates in data frame
# put X_c, qi estimates, and se estimates in data frame
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
data.frame() |>
data.frame() |>
glimpse()
glimpse()


# plot
# plot
ggplot(qi, aes(x = age, y = ev_hat,
ggplot(qi, aes(x = lower, y = ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymax = ev_hat + 1.64*se_ev_hat)) +
ymax = ev_hat + 1.64*se_ev_hat)) +
geom_ribbon() +
geom_ribbon() +
geom_line()
geom_line()


# ---- compute first difference ----
# ---- compute first difference ----


# make X_lo
# make X_lo
X_lo <- cbind(
X_lo <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"age" = quantile(turnout$age, probs = 0.25), # 31 years old; 25th percentile
"lower" = quantile(holland$lower, probs = 0.25),
"educate" = median(turnout$educate),
"vendors" = median(holland$vendors),
"income" = median(turnout$income),
"budget" = median(holland$budget),
"white" = 1 # white indicators = 1
"population" = median(holland$population)
)
)


# make X_hi by modifying the relevant value of X_lo
# make X_hi by modifying the relevant value of X_lo
X_hi <- X_lo
X_hi <- X_lo
X_hi[, "age"] <- quantile(turnout$age, probs = 0.75) # 59 years old; 75th percentile
X_hi[, "lower"] <- quantile(holland$lower, probs = 0.75)


# function to compute first difference
# function to compute first difference
fd_fn <- function(beta, hi, lo) {
fd_fn <- function(beta, hi, lo) {
plogis(hi%*%beta) - plogis(lo%*%beta)
exp(hi%*%beta) - exp(lo%*%beta)
}
}


# invariance property
# invariance property
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)


# delta method
# delta method
grad <- grad(
grad <- grad(
func = fd_fn,
func = fd_fn,
x = fit$beta_hat,
x = fit$beta_hat,
hi = X_hi,
hi = X_hi,
lo = X_lo)
lo = X_lo)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)


# estimated fd
# estimated fd
fd_hat
fd_hat


# estimated se
# estimated se
se_fd_hat
se_fd_hat


# 90% ci
# 90% ci
fd_hat - 1.64*se_fd_hat # lower
fd_hat - 1.64*se_fd_hat # lower
fd_hat + 1.64*se_fd_hat # upper
fd_hat + 1.64*se_fd_hat # upper