<-glm(vs~mpg+hp,mtcars,family='binomial') fit
Definition
Rather than being drawn from a normal distribution (where \(\mu=\beta X\)) like in linear regression, the outcome variable in logistic regression is drawn from a Bernouilli.
\[p(x)={\frac {1}{1+e^{-(\beta _{0}+\beta _{1}x)}}}\] The logit function, or the log of the odds is defined as \[logit\:p(x)=\ln \left({\frac {p(x)}{1-p(x)}}\right)=\beta _{0}+\beta _{1}x\]
The likelihood (the probability of observing the data) is \[L=\prod _{k:y_{k}=1}p_{k}\,\prod _{k:y_{k}=0}(1-p_{k})\]
The log likelihood is
\[\ell =\sum _{k:y_{k}=1}\ln(p_{k})+\sum _{k:y_{k}=0}\ln(1-p_{k})\]
Estimation
For example, in R,
\[p(vs=1) \sim Bernouilli(logit(\beta0+\beta1*mpg+\beta2*hp))\]
Optimizing:
<- rnorm(3)
pars_init <- mtcars$vs
y <- as.matrix(cbind(1, mtcars[, c("mpg", "hp")]))
X <- function(pars, y, X) {
to_optim <- pars
betas <- X %*% betas
t <- plogis(t)
p <- sum(log(p[y == 1] + 1e-10))
ll1 <- sum(log(1 - p[y == 0] + 1e-10))
ll0 return(-(ll1 + ll0))
}coef(fit)
(Intercept) mpg hp
9.53119467 -0.03385354 -0.07233547
optim(pars_init, to_optim, y = y, X = X,method = "BFGS")$par
[1] 3.6854603 79.1027200 0.9586848
It doesn’t work. Looks like it’s an optimization problem.
Scaling fixes it.
library(tidyverse)
<- as.matrix(cbind(1, mtcars[, c("mpg", "hp")]%>%mutate_all(scale)))
X <-glm(vs~scale(mpg)+scale(hp),mtcars,family='binomial')
fit_s<- optim(pars_init, to_optim, y = y, X = X,method = "BFGS",hessian = TRUE)
fit_optim <- solve(fit_optim$hessian)
cov_beta_hat <- sqrt(diag(cov_beta_hat))
se_beta_hat coef(summary(fit_s))[,1:2]
Estimate Std. Error
(Intercept) -1.7596527 1.161736
scale(mpg) -0.2040335 1.090667
scale(hp) -4.9595270 2.373026
cbind(fit_optim$par,se_beta_hat)
se_beta_hat
[1,] -1.7598400 1.161868
[2,] -0.2041065 1.090686
[3,] -4.9599152 2.373276
Doing the optimization by hand with Newton Raphson
<- function(pars, y, X, max_iter=100, tol=1e-06) {
newton_raphson_se <- pars
betas for(i in 1:max_iter) {
<- plogis(X %*% betas)
p <- diag(as.vector(p * (1 - p)))
W <- t(X) %*% (y - p)
gradient <- -t(X) %*% W %*% X
hessian <- betas - solve(hessian, gradient)
betas_new if(sum(abs(betas_new - betas)) < tol) {
<- -t(X) %*% diag(as.vector(p * (1 - p))) %*% X
hessian_at_mle <- solve(-hessian_at_mle)
cov_beta <- sqrt(diag(cov_beta))
se_beta return(list(coefficients=betas_new, std_error=se_beta, covariance=cov_beta))
}<- betas_new
betas
}stop("Algorithm did not converge")
}<- rnorm(3,0,0.1)
pars_init <- newton_raphson_se(pars_init, y, X)
nr coef(summary(fit_s))[,1:2]
Estimate Std. Error
(Intercept) -1.7596527 1.161736
scale(mpg) -0.2040335 1.090667
scale(hp) -4.9595270 2.373026
cbind(nr$coefficients,nr$std_error)
[,1] [,2]
1 -1.7596527 1.161765
mpg -0.2040335 1.090672
hp -4.9595270 2.373079
Getting residuals:
<-glm(vs~mpg+hp,mtcars,family='binomial')
fitsummary(fit)
Call:
glm(formula = vs ~ mpg + hp, family = "binomial", data = mtcars)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.09164 -0.19536 -0.01377 0.50499 1.18424
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 9.53119 7.03368 1.355 0.1754
mpg -0.03385 0.18097 -0.187 0.8516
hp -0.07234 0.03461 -2.090 0.0366 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 43.860 on 31 degrees of freedom
Residual deviance: 16.803 on 29 degrees of freedom
AIC: 22.803
Number of Fisher Scoring iterations: 7
<- mtcars$vs
y <- predict(fit,type = 'response')
pred_p # deviance residuals
sign(y-pred_p) * ifelse(y==1,sqrt(-2*log(pred_p)),sqrt(-2*log(1-pred_p)))
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
-1.5590059536 -1.5590059536 0.4962884481 0.8437021366
Hornet Sportabout Valiant Duster 360 Merc 240D
-0.2144937088 0.6871394963 -0.0184808702 0.1707999694
Merc 230 Merc 280 Merc 280C Merc 450SE
0.5310868376 1.1842357578 1.1641307770 -0.1863798530
Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
-0.1835858307 -0.1901703014 -0.0838160203 -0.0584048409
Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
-0.0315760682 0.2253974400 0.1318746631 0.2230111955
Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
0.5565252682 -0.5419825955 -0.5445474113 -0.0187963265
Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
-0.2127064039 0.2069627690 -2.0916418435 1.0416111572
Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
-0.0090629212 -0.2109336591 -0.0007045712 0.8182883341
resid(fit)
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
-1.5590059536 -1.5590059536 0.4962884481 0.8437021366
Hornet Sportabout Valiant Duster 360 Merc 240D
-0.2144937088 0.6871394963 -0.0184808702 0.1707999694
Merc 230 Merc 280 Merc 280C Merc 450SE
0.5310868376 1.1842357578 1.1641307770 -0.1863798530
Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
-0.1835858307 -0.1901703014 -0.0838160203 -0.0584048409
Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
-0.0315760682 0.2253974400 0.1318746631 0.2230111955
Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
0.5565252682 -0.5419825955 -0.5445474113 -0.0187963265
Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
-0.2127064039 0.2069627690 -2.0916418435 1.0416111572
Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
-0.0090629212 -0.2109336591 -0.0007045712 0.8182883341
# working residuals
$residuals fit
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
-3.371136 -3.371136 1.131055 1.427489
Hornet Sportabout Valiant Duster 360 Merc 240D
-1.023270 1.266276 -1.000171 1.014693
Merc 230 Merc 280 Merc 280C Merc 450SE
1.151455 2.016185 1.969147 -1.017520
Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
-1.016995 -1.018247 -1.003519 -1.001707
Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
-1.000499 1.025727 1.008733 1.025179
Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
1.167495 -1.158206 -1.159821 -1.000177
Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
-1.022880 1.021648 -8.912750 1.720263
Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
-1.000041 -1.022496 -1.000000 1.397658
-pred_p) / (pred_p*(1-pred_p)) (y
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
-3.371136 -3.371136 1.131055 1.427489
Hornet Sportabout Valiant Duster 360 Merc 240D
-1.023270 1.266276 -1.000171 1.014693
Merc 230 Merc 280 Merc 280C Merc 450SE
1.151455 2.016185 1.969147 -1.017520
Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
-1.016995 -1.018247 -1.003519 -1.001707
Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
-1.000499 1.025727 1.008733 1.025179
Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
1.167495 -1.158206 -1.159821 -1.000177
Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
-1.022880 1.021648 -8.912750 1.720263
Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
-1.000041 -1.022496 -1.000000 1.397658
quantile(resid(fit),c(0,0.25,0.5,0.75,1))
0% 25% 50% 75% 100%
-2.0916418 -0.1953611 -0.0137719 0.5049880 1.1842358
Getting deviance:
# Residual deviance
sum(resid(fit))
[1] 0.5657591
# Null deviance is residual deviance from Null model
sum(resid(glm(vs~1,mtcars,family='binomial'))^2)
[1] 43.86011
Getting log lik:
logLik(fit)
'log Lik.' -8.401486 (df=3)
log(prod(ifelse(y,pred_p,1-pred_p)))
[1] -8.401486
Getting AIC:
AIC = 2*3 - 2*logLik(fit)) (
'log Lik.' 22.80297 (df=3)
<- as.matrix(cbind(1,fit$model[,-1]))
Xp <- coef(fit)
b <- Xp %*% b
yh <- vcov(fit)
V <- rowSums((Xp %*% V) * Xp)
var.fit sqrt(var.fit)[1:10]
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
0.6500130 0.6500130 0.8835895 0.6366855
Hornet Sportabout Valiant Duster 360 Merc 240D
1.9371305 1.0340190 3.9245037 1.6766014
Merc 230 Merc 280
0.8386182 0.6885804
predict(fit,se.fit=TRUE)$se.fit[1:10]
Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
0.6500130 0.6500130 0.8835895 0.6366855
Hornet Sportabout Valiant Duster 360 Merc 240D
1.9371305 1.0340190 3.9245037 1.6766014
Merc 230 Merc 280
0.8386182 0.6885804
Confidence intervals:
<- predict(fit,se.fit=TRUE)
predict_fit <- 1.96 ## approx 95% CI
critval <- predict_fit$fit + (critval * predict_fit$se.fit)
upr <- predict_fit$fit - (critval * predict_fit$se.fit)
lwr <- predict_fit$fit
estimate <- fit$family$linkinv(estimate)
estimate2 <- fit$family$linkinv(upr)
upr2 <- fit$family$linkinv(lwr) lwr2
With logistic regression, there’s no possibility to create a predictive interval because it would be [0,1] if the probability is between 0.05 and 0.95 and [0,0]/[1,1] otherwise.