Diff
checker
텍스트
텍스트
이미지
문서
Excel
폴더
Legal
Enterprise
데스크톱
요금제
로그인
데스크톱 앱 다운로드
텍스트 비교
두 텍스트 파일의 차이점을 찾아보세요
도구
기록
실시간 편집
변경 없는 행 숨기기
줄바꿈 비활성화
레이아웃
나란히 보기
합쳐 보기
비교 단위
스마트
단어
글자
구문 강조
언어 선택
제외
텍스트 변환
첫 변경으로
수정
Diffchecker Desktop
가장 안전하게 Diffchecker를 사용하는 방법. 데스크톱 앱을 사용하면 비교 데이터가 외부로 전송되지 않습니다!
데스크톱 앱 받기
Poisson versus NB
생성일
9개월 전
비교 결과 만료 없음
초기화
내보내기
공유
설명
13 삭제
행
총
삭제
글자
총
삭제
이 기능을 계속 사용하려면 업그레이드해 주세요
Diff
checker
Pro
요금제 보기
164 행
복사
21 추가
행
총
추가
글자
총
추가
이 기능을 계속 사용하려면 업그레이드해 주세요
Diff
checker
Pro
요금제 보기
168 행
복사
복사
복사됨
복사
복사됨
# ---- fit
negative binomial
model with optim() ----
# ---- fit
poisson
model with optim() ----
# data; see ?crdata::holland2015
# data; see ?crdata::holland2015
holland <- crdata::holland2015 |>
holland <- crdata::holland2015 |>
filter(city == "santiago")
filter(city == "santiago")
# formula corresponds to model 1 for each city in holland (2015) table 2
# formula corresponds to model 1 for each city in holland (2015) table 2
f <- operations ~ lower + vendors + budget + population
f <- operations ~ lower + vendors + budget + population
# ---- create a function to fit the model ----
# ---- create a function to fit the model ----
복사
복사됨
복사
복사됨
# log-likelihood function
# log-likelihood function
(NB2: Var(y|X)=mu + mu^2/theta)
poisson
_ll <- function(
beta
, y, X) {
nb
_ll <- function(
par
, y, X) {
beta <- par[1:ncol(X)]
theta <- par[ncol(X) + 1]
linpred <- X%*%beta # perhaps denoted eta
linpred <- X%*%beta # perhaps denoted eta
복사
복사됨
복사
복사됨
lambda
<- exp(linpred)
mu
<- exp(linpred)
ll <- sum(
dpois
(y,
lambda
=
lambda
, log = TRUE))
ll <- sum(
dnbinom
(y,
mu
=
mu, size = theta
, log = TRUE))
return(ll)
return(ll)
}
}
# function to fit model
# function to fit model
복사
복사됨
복사
복사됨
est_
poisson
<- function(f, data) {
est_
nb
<- function(f, data) {
# make X and y
# make X and y
mf <- model.frame(f, data = data)
mf <- model.frame(f, data = data)
X <- model.matrix(f, data = mf)
X <- model.matrix(f, data = mf)
y <- model.response(mf)
y <- model.response(mf)
# create starting values
# create starting values
복사
복사됨
복사
복사됨
par_start <-
rep(0, ncol(X))
par_start <-
c(
rep(0, ncol(X))
, 1) # append theta start (>0)
# run optim()
# run optim()
est <- optim(par_start,
est <- optim(par_start,
복사
복사됨
복사
복사됨
fn =
poisson
_ll,
fn =
nb
_ll,
y = y,
y = y,
X = X,
X = X,
hessian = TRUE, # for SEs!
hessian = TRUE, # for SEs!
control = list(fnscale = -1),
control = list(fnscale = -1),
method = "BFGS")
method = "BFGS")
# check convergence; print warning if not
# check convergence; print warning if not
if (est$convergence != 0) print("Model did not converge!")
if (est$convergence != 0) print("Model did not converge!")
# create list of objects to return
# create list of objects to return
복사
복사됨
복사
복사됨
res <- list(beta_hat = est$par
,
k <- ncol(X)
var_hat = solve(-est$hessian)
)
res <- list(beta_hat = est$par
[1:k]
,
var_hat = solve(-est$hessian)
[1:k, 1:k],
theta_hat = est$par[k + 1],
var_theta_hat = solve(-est$hessian)[k + 1, k + 1])
# return the list
# return the list
return(res)
return(res)
}
}
# fit model
# fit model
fit <- est_poisson(f, data = holland)
fit <- est_poisson(f, data = holland)
print(fit, digits = 2) # print estimates w/ reasonable digits
print(fit, digits = 2) # print estimates w/ reasonable digits
# ---- compute the expected value given X_c ----
# ---- compute the expected value given X_c ----
# create chosen values for X
# create chosen values for X
# note 1: naming columns helps a bit later
# note 1: naming columns helps a bit later
# note 2: can also do with f, model.matrix(..., newdata = ...)
# note 2: can also do with f, model.matrix(..., newdata = ...)
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"lower" = median(holland$lower),
"lower" = median(holland$lower),
"vendors" = median(holland$vendors),
"vendors" = median(holland$vendors),
"budget" = median(holland$budget),
"budget" = median(holland$budget),
"population" = median(holland$population)
"population" = median(holland$population)
)
)
# function to compute qi
# function to compute qi
ev_fn <- function(beta, X) {
ev_fn <- function(beta, X) {
exp(X%*%beta)
exp(X%*%beta)
}
}
# invariance property
# invariance property
ev_hat <- ev_fn(fit$beta_hat, X_c)
ev_hat <- ev_fn(fit$beta_hat, X_c)
# delta method
# delta method
library(numDeriv) # for grad()
library(numDeriv) # for grad()
grad <- grad(
grad <- grad(
func = ev_fn, # what function are we taking the derivative of?
func = ev_fn, # what function are we taking the derivative of?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.?
X = X_c) # what other values are needed?
X = X_c) # what other values are needed?
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad)
# ---- compute the ev given X_c (w/ range of values) ----
# ---- compute the ev given X_c (w/ range of values) ----
# create chosen values for X
# create chosen values for X
X_c <- cbind(
X_c <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"lower" = seq(min(holland$lower), max(holland$lower), by = 1),
"lower" = seq(min(holland$lower), max(holland$lower), by = 1),
"vendors" = median(holland$vendors),
"vendors" = median(holland$vendors),
"budget" = median(holland$budget),
"budget" = median(holland$budget),
"population" = median(holland$population)
"population" = median(holland$population)
)
)
# containers for estimated quantities of interest and ses
# containers for estimated quantities of interest and ses
ev_hat <- numeric(nrow(X_c))
ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))
se_ev_hat <- numeric(nrow(X_c))
# loop over each row of X_c and compute qi and se
# loop over each row of X_c and compute qi and se
for (i in 1:nrow(X_c)) { # for the ith row of X...
for (i in 1:nrow(X_c)) { # for the ith row of X...
# invariance property
# invariance property
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ])
# delta method
# delta method
grad <- grad(
grad <- grad(
func = ev_fn,
func = ev_fn,
x = fit$beta_hat,
x = fit$beta_hat,
X = X_c[i, ])
X = X_c[i, ])
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad)
}
}
# put X_c, qi estimates, and se estimates in data frame
# put X_c, qi estimates, and se estimates in data frame
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
qi <- cbind(X_c, ev_hat, se_ev_hat) |>
data.frame() |>
data.frame() |>
glimpse()
glimpse()
# plot
# plot
ggplot(qi, aes(x = lower, y = ev_hat,
ggplot(qi, aes(x = lower, y = ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymin = ev_hat - 1.64*se_ev_hat,
ymax = ev_hat + 1.64*se_ev_hat)) +
ymax = ev_hat + 1.64*se_ev_hat)) +
geom_ribbon() +
geom_ribbon() +
geom_line()
geom_line()
# ---- compute first difference ----
# ---- compute first difference ----
# make X_lo
# make X_lo
X_lo <- cbind(
X_lo <- cbind(
"constant" = 1, # intercept
"constant" = 1, # intercept
"lower" = quantile(holland$lower, probs = 0.25),
"lower" = quantile(holland$lower, probs = 0.25),
"vendors" = median(holland$vendors),
"vendors" = median(holland$vendors),
"budget" = median(holland$budget),
"budget" = median(holland$budget),
"population" = median(holland$population)
"population" = median(holland$population)
)
)
# make X_hi by modifying the relevant value of X_lo
# make X_hi by modifying the relevant value of X_lo
X_hi <- X_lo
X_hi <- X_lo
X_hi[, "lower"] <- quantile(holland$lower, probs = 0.75)
X_hi[, "lower"] <- quantile(holland$lower, probs = 0.75)
# function to compute first difference
# function to compute first difference
fd_fn <- function(beta, hi, lo) {
fd_fn <- function(beta, hi, lo) {
exp(hi%*%beta) - exp(lo%*%beta)
exp(hi%*%beta) - exp(lo%*%beta)
}
}
# invariance property
# invariance property
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)
fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo)
# delta method
# delta method
grad <- grad(
grad <- grad(
func = fd_fn,
func = fd_fn,
x = fit$beta_hat,
x = fit$beta_hat,
hi = X_hi,
hi = X_hi,
lo = X_lo)
lo = X_lo)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)
se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad)
# estimated fd
# estimated fd
fd_hat
fd_hat
# estimated se
# estimated se
se_fd_hat
se_fd_hat
# 90% ci
# 90% ci
fd_hat - 1.64*se_fd_hat # lower
fd_hat - 1.64*se_fd_hat # lower
fd_hat + 1.64*se_fd_hat # upper
fd_hat + 1.64*se_fd_hat # upper
저장된 비교 결과
원본
파일 열기
# ---- fit poisson model with optim() ---- # data; see ?crdata::holland2015 holland <- crdata::holland2015 |> filter(city == "santiago") # formula corresponds to model 1 for each city in holland (2015) table 2 f <- operations ~ lower + vendors + budget + population # ---- create a function to fit the model ---- # log-likelihood function poisson_ll <- function(beta, y, X) { linpred <- X%*%beta # perhaps denoted eta lambda <- exp(linpred) ll <- sum(dpois(y, lambda = lambda, log = TRUE)) return(ll) } # function to fit model est_poisson <- function(f, data) { # make X and y mf <- model.frame(f, data = data) X <- model.matrix(f, data = mf) y <- model.response(mf) # create starting values par_start <- rep(0, ncol(X)) # run optim() est <- optim(par_start, fn = poisson_ll, y = y, X = X, hessian = TRUE, # for SEs! control = list(fnscale = -1), method = "BFGS") # check convergence; print warning if not if (est$convergence != 0) print("Model did not converge!") # create list of objects to return res <- list(beta_hat = est$par, var_hat = solve(-est$hessian)) # return the list return(res) } # fit model fit <- est_poisson(f, data = holland) print(fit, digits = 2) # print estimates w/ reasonable digits # ---- compute the expected value given X_c ---- # create chosen values for X # note 1: naming columns helps a bit later # note 2: can also do with f, model.matrix(..., newdata = ...) X_c <- cbind( "constant" = 1, # intercept "lower" = median(holland$lower), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # function to compute qi ev_fn <- function(beta, X) { exp(X%*%beta) } # invariance property ev_hat <- ev_fn(fit$beta_hat, X_c) # delta method library(numDeriv) # for grad() grad <- grad( func = ev_fn, # what function are we taking the derivative of? x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.? X = X_c) # what other values are needed? se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad) # ---- compute the ev given X_c (w/ range of values) ---- # create chosen values for X X_c <- cbind( "constant" = 1, # intercept "lower" = seq(min(holland$lower), max(holland$lower), by = 1), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # containers for estimated quantities of interest and ses ev_hat <- numeric(nrow(X_c)) se_ev_hat <- numeric(nrow(X_c)) # loop over each row of X_c and compute qi and se for (i in 1:nrow(X_c)) { # for the ith row of X... # invariance property ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ]) # delta method grad <- grad( func = ev_fn, x = fit$beta_hat, X = X_c[i, ]) se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad) } # put X_c, qi estimates, and se estimates in data frame qi <- cbind(X_c, ev_hat, se_ev_hat) |> data.frame() |> glimpse() # plot ggplot(qi, aes(x = lower, y = ev_hat, ymin = ev_hat - 1.64*se_ev_hat, ymax = ev_hat + 1.64*se_ev_hat)) + geom_ribbon() + geom_line() # ---- compute first difference ---- # make X_lo X_lo <- cbind( "constant" = 1, # intercept "lower" = quantile(holland$lower, probs = 0.25), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # make X_hi by modifying the relevant value of X_lo X_hi <- X_lo X_hi[, "lower"] <- quantile(holland$lower, probs = 0.75) # function to compute first difference fd_fn <- function(beta, hi, lo) { exp(hi%*%beta) - exp(lo%*%beta) } # invariance property fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo) # delta method grad <- grad( func = fd_fn, x = fit$beta_hat, hi = X_hi, lo = X_lo) se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad) # estimated fd fd_hat # estimated se se_fd_hat # 90% ci fd_hat - 1.64*se_fd_hat # lower fd_hat + 1.64*se_fd_hat # upper
수정본
파일 열기
# ---- fit negative binomial model with optim() ---- # data; see ?crdata::holland2015 holland <- crdata::holland2015 |> filter(city == "santiago") # formula corresponds to model 1 for each city in holland (2015) table 2 f <- operations ~ lower + vendors + budget + population # ---- create a function to fit the model ---- # log-likelihood function (NB2: Var(y|X)=mu + mu^2/theta) nb_ll <- function(par, y, X) { beta <- par[1:ncol(X)] theta <- par[ncol(X) + 1] linpred <- X%*%beta # perhaps denoted eta mu <- exp(linpred) ll <- sum(dnbinom(y, mu = mu, size = theta, log = TRUE)) return(ll) } # function to fit model est_nb <- function(f, data) { # make X and y mf <- model.frame(f, data = data) X <- model.matrix(f, data = mf) y <- model.response(mf) # create starting values par_start <- c(rep(0, ncol(X)), 1) # append theta start (>0) # run optim() est <- optim(par_start, fn = nb_ll, y = y, X = X, hessian = TRUE, # for SEs! control = list(fnscale = -1), method = "BFGS") # check convergence; print warning if not if (est$convergence != 0) print("Model did not converge!") # create list of objects to return k <- ncol(X) res <- list(beta_hat = est$par[1:k], var_hat = solve(-est$hessian)[1:k, 1:k], theta_hat = est$par[k + 1], var_theta_hat = solve(-est$hessian)[k + 1, k + 1]) # return the list return(res) } # fit model fit <- est_poisson(f, data = holland) print(fit, digits = 2) # print estimates w/ reasonable digits # ---- compute the expected value given X_c ---- # create chosen values for X # note 1: naming columns helps a bit later # note 2: can also do with f, model.matrix(..., newdata = ...) X_c <- cbind( "constant" = 1, # intercept "lower" = median(holland$lower), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # function to compute qi ev_fn <- function(beta, X) { exp(X%*%beta) } # invariance property ev_hat <- ev_fn(fit$beta_hat, X_c) # delta method library(numDeriv) # for grad() grad <- grad( func = ev_fn, # what function are we taking the derivative of? x = fit$beta_hat, # what variable(s) are we taking the derivative w.r.t.? X = X_c) # what other values are needed? se_ev_hat <- sqrt(grad %*% fit$var_hat %*% grad) # ---- compute the ev given X_c (w/ range of values) ---- # create chosen values for X X_c <- cbind( "constant" = 1, # intercept "lower" = seq(min(holland$lower), max(holland$lower), by = 1), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # containers for estimated quantities of interest and ses ev_hat <- numeric(nrow(X_c)) se_ev_hat <- numeric(nrow(X_c)) # loop over each row of X_c and compute qi and se for (i in 1:nrow(X_c)) { # for the ith row of X... # invariance property ev_hat[i] <- ev_fn(fit$beta_hat, X_c[i, ]) # delta method grad <- grad( func = ev_fn, x = fit$beta_hat, X = X_c[i, ]) se_ev_hat[i] <- sqrt(grad %*% fit$var_hat %*% grad) } # put X_c, qi estimates, and se estimates in data frame qi <- cbind(X_c, ev_hat, se_ev_hat) |> data.frame() |> glimpse() # plot ggplot(qi, aes(x = lower, y = ev_hat, ymin = ev_hat - 1.64*se_ev_hat, ymax = ev_hat + 1.64*se_ev_hat)) + geom_ribbon() + geom_line() # ---- compute first difference ---- # make X_lo X_lo <- cbind( "constant" = 1, # intercept "lower" = quantile(holland$lower, probs = 0.25), "vendors" = median(holland$vendors), "budget" = median(holland$budget), "population" = median(holland$population) ) # make X_hi by modifying the relevant value of X_lo X_hi <- X_lo X_hi[, "lower"] <- quantile(holland$lower, probs = 0.75) # function to compute first difference fd_fn <- function(beta, hi, lo) { exp(hi%*%beta) - exp(lo%*%beta) } # invariance property fd_hat <- fd_fn(fit$beta_hat, X_hi, X_lo) # delta method grad <- grad( func = fd_fn, x = fit$beta_hat, hi = X_hi, lo = X_lo) se_fd_hat <- sqrt(grad %*% fit$var_hat %*% grad) # estimated fd fd_hat # estimated se se_fd_hat # 90% ci fd_hat - 1.64*se_fd_hat # lower fd_hat + 1.64*se_fd_hat # upper
비교하기