Diff
checker
文本
文本
图像
文档
Excel
文件夹
Legal
Enterprise
桌面版
定价
登录
下载 Diffchecker 桌面版
比较文本
查找两个文本文件之间的差异
工具
历史
实时编辑器
折叠未更改行
关闭换行
视图
拆分
统一
比对精度
智能
单词
字符
语法高亮
选择语法
忽略
文本转换
转到第一个差异
编辑输入
Diffchecker Desktop
运行Diffchecker最安全的方式。获取Diffchecker桌面应用:您的差异永远不会离开您的电脑!
获取桌面版
Logit versus Poisson
创建于
9个月前
差异永不过期
清除
导出
分享
解释
53 删除
行
总计
删除
字符
总计
删除
要继续使用此功能,请升级到
Diff
checker
Pro
查看价格
164 行
全部复制
52 添加
行
总计
添加
字符
总计
添加
要继续使用此功能,请升级到
Diff
checker
Pro
查看价格
164 行
全部复制
复制
已复制
复制
已复制
# ---- fit
logit
model with optim() ----
# ---- fit
poisson
model with optim() ----
复制
已复制
复制
已复制
# data
# data
; see ?crdata::holland2015
devtools::install_github("jrnold/ZeligData")
holland <- crdata::holland2015 |>
turnout <- ZeligData::turnout
filter(city == "santiago")
复制
已复制
复制
已复制
# formula
# formula
corresponds to model 1 for each city in holland (2015) table 2
f <-
vote
~
age
+
educate
+
income
+
race
f <-
operations
~
lower
+
vendors
+
budget
+
population
# ---- create a function to fit the model ----
# ---- create a function to fit the model ----
# log-likelihood function
# log-likelihood function
复制
已复制
复制
已复制
logit
_ll <- function(beta, y, X) {
poisson
_ll <- function(beta, y, X) {
linpred <- X%*%beta # perhaps denoted eta
linpred <- X%*%beta # perhaps denoted eta
复制
已复制
复制
已复制
p
<-
plogis
(linpred)
# pi is special in R, so I use p
lambda
<-
exp
(linpred)
ll <- sum(
dbinom
(y,
size = 1, prob
=
p
, log = TRUE))
ll <- sum(
dpois
(y,
lambda
=
lambda
, log = TRUE))
return(ll)
return(ll)
}
}
# function to fit model
# function to fit model
复制
已复制
复制
已复制
est_
logit
<- function(f, data) {
est_
poisson
<- function(f, data) {
# make X and y
# make X and y
mf <- model.frame(f, data = data)
mf <- model.frame(f, data = data)
X <- model.matrix(f, data = mf)
X <- model.matrix(f, data = mf)
y <- model.response(mf)
y <- model.response(mf)
# create starting values
# create starting values
par_start <- rep(0, ncol(X))
par_start <- rep(0, ncol(X))
# run optim()
# run optim()
est <- optim(par_start,
est <- optim(par_start,
复制
已复制
复制
已复制
fn =
logit
_ll,
fn =
poisson
_ll,
y = y,
y = y,
X = X,
X = X,
hessian = TRUE, # for SEs!
hessian = TRUE, # for SEs!
control = list(fnscale = -1),
control = list(fnscale = -1),
method = "BFGS")
method = "BFGS")
# check convergence; print warning if not
# check convergence; print warning if not
if (est$convergence != 0) print("Model did not converge!")
if (est$convergence != 0) print("Model did not converge!")
# create list of objects to return
# create list of objects to return
res <- list(beta_hat = est$par,
res <- list(beta_hat = est$par,
var_hat = solve(-est$hessian))
var_hat = solve(-est$hessian))
# return the list
# return the list
return(res)
return(res)
}
}
# fit model
# fit model
复制
已复制
复制
已复制
fit <- est_
logit
(f, data =
turnout
)
fit <- est_
poisson
(f, data =
holland
)
print(fit, digits = 2) # print estimates w/ reasonable digits
print(fit, digits = 2) # print estimates w/ reasonable digits
# ---- compute the expected value given X_c ----
# ---- compute the expected value given X_c ----
# create chosen values for X
# create chosen values for X
# note 1: naming columns helps a bit later
# note 1: naming columns helps a bit later
# note 2: can also do with f, model.matrix(..., newdata = ...)
# note 2: can also do with f, model.matrix(..., newdata = ...)
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
复制
已复制
复制
已复制
"
age
" = median(
turnout$age
),
"
lower
" = median(
holland$lower
),
"
educate"
= median(
turnout$educate
),
"
vendors"
= median(
holland$vendors
),
"
income"
= median(
turnout$income
),
"
budget"
= median(
holland$budget
),
"
white"
=
1 # white indicators = 1
"
population"
=
median(holland$population)
)
)
# function to compute qi
# function to compute qi
ev_fn <- function(beta, X) {
ev_fn <- function(beta, X) {
复制
已复制
复制
已复制
plogis
(X%*%beta)
exp
(X%*%beta)
}
}
# invariance property
# invariance property
ev_hat <- ev_fn(fit$beta_hat, X_c)
ev_hat <- ev_fn(fit$beta_hat, X_c)
# delta method
# delta method
library(numDeriv) # for grad()
library(numDeriv) # for grad()
grad <- grad(
grad <- grad(
func = ev_fn, # what function are we taking the derivative of?
func = ev_fn, # what function are we taking the derivative of?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
X = X_c) # what other values are needed?
X = X_c) # what other values are needed?
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)
# ---- compute the ev given X_c (w/ range of values) ----
# ---- compute the ev given X_c (w/ range of values) ----
# create chosen values for X
# create chosen values for X
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
复制
已复制
复制
已复制
"
age
" =
min(
turnout$age):
max(
turnout$age
),
"
lower
" =
seq(
min(
holland$lower),
max(
holland$lower), by = 1
),
"
educate"
= median(
turnout$educate
),
"
vendors"
= median(
holland$vendors
),
"
income"
= median(
turnout$income
),
"
budget"
= median(
holland$budget
),
"
white"
=
1 # white indicators = 1
"
population"
=
median(holland$population)
)
)
# containers for estimated quantities of interest and ses
# containers for estimated quantities of interest and ses
ev_hat <- numeric(nrow(X_c))
ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))
# loop over each row of X_c and compute qi and se
# loop over each row of X_c and compute qi and se
for (i in 1:nrow(X_c)) { # for the ith row of X...
for (i in 1:nrow(X_c)) { # for the ith row of X...
# invariance property
# invariance property
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
# delta method
# delta method
grad <- grad(
grad <- grad(
func = ev_fn,
func = ev_fn,
x = fit$beta_hat,
x = fit$beta_hat,
X = X_c[i, ])
X = X_c[i, ])
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
}
}
# put X_c, qi estimates, and se estimates in data frame
# put X_c, qi estimates, and se estimates in data frame
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
data.frame() |>
data.frame() |>
glimpse()
glimpse()
# plot
# plot
复制
已复制
复制
已复制
ggplot(qi, aes(x =
age
, y = ev_hat,
ggplot(qi, aes(x =
lower
, y = ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymax = ev_hat + 1.64*se_ev_hat)) +
ymax = ev_hat + 1.64*se_ev_hat)) +
geom_ribbon() +
geom_ribbon() +
geom_line()
geom_line()
# ---- compute first difference ----
# ---- compute first difference ----
# make X_lo
# make X_lo
X_lo <- cbind(
X_lo <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
复制
已复制
复制
已复制
"
age
" = quantile(
turnout$age
, probs = 0.25),
# 31 years old; 25th percentile
"
lower
" = quantile(
holland$lower
, probs = 0.25),
"
educate"
= median(
turnout$educate
),
"
vendors"
= median(
holland$vendors
),
"
income"
= median(
turnout$income
),
"
budget"
= median(
holland$budget
),
"
white"
=
1 # white indicators = 1
"
population"
=
median(holland$population)
)
)
# make X_hi by modifying the relevant value of X_lo
# make X_hi by modifying the relevant value of X_lo
X_hi <- X_lo
X_hi <- X_lo
复制
已复制
复制
已复制
X_hi[, "
age
"] <- quantile(
turnout$age
, probs = 0.75)
# 59 years old; 75th percentile
X_hi[, "
lower
"] <- quantile(
holland$lower
, probs = 0.75)
# function to compute first difference
# function to compute first difference
fd_fn <- function(beta, hi, lo) {
fd_fn <- function(beta, hi, lo) {
复制
已复制
复制
已复制
plogis
(hi%*%beta) -
plogis
(lo%*%beta)
exp
(hi%*%beta) -
exp
(lo%*%beta)
}
}
# invariance property
# invariance property
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)
# delta method
# delta method
grad <- grad(
grad <- grad(
func = fd_fn,
func = fd_fn,
x = fit$beta_hat,
x = fit$beta_hat,
hi = X_hi,
hi = X_hi,
lo = X_lo)
lo = X_lo)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)
# estimated fd
# estimated fd
fd_hat
fd_hat
# estimated se
# estimated se
se_fd_hat
se_fd_hat
# 90% ci
# 90% ci
fd_hat - 1.64*se_fd_hat # lower
fd_hat - 1.64*se_fd_hat # lower
fd_hat + 1.64*se_fd_hat # upper
fd_hat + 1.64*se_fd_hat # upper
已保存差异
原始文本
打开文件
# ---- fit logit model with optim() ---- # data devtools::install_github("jrnold/ZeligData") turnout <- ZeligData::turnout # formula f <- vote ~ age + educate + income + race # ---- create a function to fit the model ---- # log-likelihood function logit_ll <- function(beta, y, X) { linpred <- X%*%beta # perhaps denoted eta p <- plogis(linpred) # pi is special in R, so I use p ll <- sum(dbinom(y, size = 1, prob = p, log = TRUE)) return(ll) } # function to fit model est_logit <- function(f, data) { # make X and y mf <- model.frame(f, data = data) X <- model.matrix(f, data = mf) y <- model.response(mf) # create starting values par_start <- rep(0, ncol(X)) # run optim() est <- optim(par_start, fn = logit_ll, y = y, X = X, hessian = TRUE, # for SEs! control = list(fnscale = -1), method = "BFGS") # check convergence; print warning if not if (est$convergence != 0) print("Model did not converge!") # create list of objects to return res <- list(beta_hat = est$par, var_hat = solve(-est$hessian)) # return the list return(res) } # fit model fit <- est_logit(f, data = turnout) print(fit, digits = 2) # print estimates w/ reasonable digits # ---- compute the expected value given X_c ---- # create chosen values for X # note 1: naming columns helps a bit later # note 2: can also do with f, model.matrix(..., newdata = ...) X_c <- cbind( "constant" = 1, # intercept "age" = median(turnout$age), "educate" = median(turnout$educate), "income" = median(turnout$income), "white" = 1 # white indicators = 1 ) # function to compute qi ev_fn <- function(beta, X) { plogis(X%*%beta) } # invariance property ev_hat <- ev_fn(fit$beta_hat, X_c) # delta method library(numDeriv) # for grad() grad <- grad( func = ev_fn, # what function are we taking the derivative of? x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.? X = X_c) # what other values are needed? se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad) # ---- compute the ev given X_c (w/ range of values) ---- # create chosen values for X X_c <- cbind( "constant" = 1, # intercept "age" = min(turnout$age):max(turnout$age), "educate" = median(turnout$educate), "income" = median(turnout$income), "white" = 1 # white indicators = 1 ) # containers for estimated quantities of interest and ses ev_hat <- numeric(nrow(X_c)) se_ev_hat <- numeric(nrow(X_c)) # loop over each row of X_c and compute qi and se for (i in 1:nrow(X_c)) { # for the ith row of X... # invariance property ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ]) # delta method grad <- grad( func = ev_fn, x = fit$beta_hat, X = X_c[i, ]) se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad) } # put X_c, qi estimates, and se estimates in data frame qi <- cbind(X_c, ev_hat, se_ev_hat) |> data.frame() |> glimpse() # plot ggplot(qi, aes(x = age, y = ev_hat, ymin = ev_hat - 1.64*se_ev_hat, ymax = ev_hat + 1.64*se_ev_hat)) + geom_ribbon() + geom_line() # ---- compute first difference ---- # make X_lo X_lo <- cbind( "constant" = 1, # intercept "age" = quantile(turnout$age, probs = 0.25), # 31 years old; 25th percentile "educate" = median(turnout$educate), "income" = median(turnout$income), "white" = 1 # white indicators = 1 ) # make X_hi by modifying the relevant value of X_lo X_hi <- X_lo X_hi[, "age"] <- quantile(turnout$age, probs = 0.75) # 59 years old; 75th percentile # function to compute first difference fd_fn <- function(beta, hi, lo) { plogis(hi%*%beta) - plogis(lo%*%beta) } # invariance property fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo) # delta method grad <- grad( func = fd_fn, x = fit$beta_hat, hi = X_hi, lo = X_lo) se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad) # estimated fd fd_hat # estimated se se_fd_hat # 90% ci fd_hat - 1.64*se_fd_hat # lower fd_hat + 1.64*se_fd_hat # upper
更改后文本
打开文件
# ---- fit poisson model with optim() ---- # data; see ?crdata::holland2015 holland <- crdata::holland2015 |> filter(city == "santiago") # formula corresponds to model 1 for each city in holland (2015) table 2 f <- operations ~ lower + vendors + budget + population # ---- create a function to fit the model ---- # log-likelihood function poisson_ll <- function(beta, y, X) { linpred <- X%*%beta # perhaps denoted eta lambda <- exp(linpred) ll <- sum(dpois(y, lambda = lambda, log = TRUE)) return(ll) } # function to fit model est_poisson <- function(f, data) { # make X and y mf <- model.frame(f, data = data) X <- model.matrix(f, data = mf) y <- model.response(mf) # create starting values par_start <- rep(0, ncol(X)) # run optim() est <- optim(par_start, fn = poisson_ll, y = y, X = X, hessian = TRUE, # for SEs! control = list(fnscale = -1), method = "BFGS") # check convergence; print warning if not if (est$convergence != 0) print("Model did not converge!") # create list of objects to return res <- list(beta_hat = est$par, var_hat = solve(-est$hessian)) # return the list return(res) } # fit model fit <- est_poisson(f, data = holland) print(fit, digits = 2) # print estimates w/ reasonable digits # ---- compute the expected value given X_c ---- # create chosen values for X # note 1: naming columns helps a bit later # note 2: can also do with f, model.matrix(..., newdata = ...) X_c <- cbind( "constant" = 1, # intercept "lower" = median(holland$lower), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # function to compute qi ev_fn <- function(beta, X) { exp(X%*%beta) } # invariance property ev_hat <- ev_fn(fit$beta_hat, X_c) # delta method library(numDeriv) # for grad() grad <- grad( func = ev_fn, # what function are we taking the derivative of? x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.? X = X_c) # what other values are needed? se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad) # ---- compute the ev given X_c (w/ range of values) ---- # create chosen values for X X_c <- cbind( "constant" = 1, # intercept "lower" = seq(min(holland$lower), max(holland$lower), by = 1), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # containers for estimated quantities of interest and ses ev_hat <- numeric(nrow(X_c)) se_ev_hat <- numeric(nrow(X_c)) # loop over each row of X_c and compute qi and se for (i in 1:nrow(X_c)) { # for the ith row of X... # invariance property ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ]) # delta method grad <- grad( func = ev_fn, x = fit$beta_hat, X = X_c[i, ]) se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad) } # put X_c, qi estimates, and se estimates in data frame qi <- cbind(X_c, ev_hat, se_ev_hat) |> data.frame() |> glimpse() # plot ggplot(qi, aes(x = lower, y = ev_hat, ymin = ev_hat - 1.64*se_ev_hat, ymax = ev_hat + 1.64*se_ev_hat)) + geom_ribbon() + geom_line() # ---- compute first difference ---- # make X_lo X_lo <- cbind( "constant" = 1, # intercept "lower" = quantile(holland$lower, probs = 0.25), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # make X_hi by modifying the relevant value of X_lo X_hi <- X_lo X_hi[, "lower"] <- quantile(holland$lower, probs = 0.75) # function to compute first difference fd_fn <- function(beta, hi, lo) { exp(hi%*%beta) - exp(lo%*%beta) } # invariance property fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo) # delta method grad <- grad( func = fd_fn, x = fit$beta_hat, hi = X_hi, lo = X_lo) se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad) # estimated fd fd_hat # estimated se se_fd_hat # 90% ci fd_hat - 1.64*se_fd_hat # lower fd_hat + 1.64*se_fd_hat # upper
查找差异