Load packages
library(car) # for Anova()
library(ggplot2)
library(effects) # effect plots
Load the data
data(UCBAdmissions)
berkeley <- as.data.frame(UCBAdmissions)
Logit model for Dept
berk.logit1 <- glm(Admit=="Admitted" ~ Dept, data=berkeley,
weights=Freq, family="binomial")
Anova(berk.logit1, test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: Admit == "Admitted"
## Df Chisq Pr(>Chisq)
## Dept 5 623 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(berk.logit1)
##
## Call:
## glm(formula = Admit == "Admitted" ~ Dept, family = "binomial",
## data = berkeley, weights = Freq)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -25.433 -13.203 -0.028 15.919 21.222
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5935 0.0684 8.68 <2e-16 ***
## DeptB -0.0506 0.1097 -0.46 0.64
## DeptC -1.2091 0.0973 -12.43 <2e-16 ***
## DeptD -1.2583 0.1015 -12.40 <2e-16 ***
## DeptE -1.6830 0.1173 -14.34 <2e-16 ***
## DeptF -3.2691 0.1671 -19.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6044.3 on 23 degrees of freedom
## Residual deviance: 5189.0 on 18 degrees of freedom
## AIC: 5201
##
## Number of Fisher Scoring iterations: 6
Main effects model
berk.logit2 <- glm(Admit=="Admitted" ~ Dept+Gender,
data=berkeley, weights=Freq, family="binomial")
Anova(berk.logit2, test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: Admit == "Admitted"
## Df Chisq Pr(>Chisq)
## Dept 5 534.71 <2e-16 ***
## Gender 1 1.53 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(berk.logit2)
##
## Call:
## glm(formula = Admit == "Admitted" ~ Dept + Gender, family = "binomial",
## data = berkeley, weights = Freq)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -25.342 -13.058 -0.163 16.017 21.320
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.5821 0.0690 8.44 <2e-16 ***
## DeptB -0.0434 0.1098 -0.40 0.69
## DeptC -1.2626 0.1066 -11.84 <2e-16 ***
## DeptD -1.2946 0.1058 -12.23 <2e-16 ***
## DeptE -1.7393 0.1261 -13.79 <2e-16 ***
## DeptF -3.3065 0.1700 -19.45 <2e-16 ***
## GenderFemale 0.0999 0.0808 1.24 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6044.3 on 23 degrees of freedom
## Residual deviance: 5187.5 on 17 degrees of freedom
## AIC: 5201
##
## Number of Fisher Scoring iterations: 6
Plots for logit models
Calculate the observed and fitted log odds
obs <- log(UCBAdmissions[1,,] / UCBAdmissions[2,,])
pred2 <- cbind(berkeley[,1:3], fit=predict(berk.logit2))
pred2 <- cbind(subset(pred2, Admit=="Admitted"), obs=as.vector(obs))
head(pred2)
## Admit Gender Dept fit obs
## 1 Admitted Male A 0.582 0.492
## 3 Admitted Female A 0.682 1.544
## 5 Admitted Male B 0.539 0.534
## 7 Admitted Female B 0.639 0.754
## 9 Admitted Male C -0.681 -0.536
## 11 Admitted Female C -0.581 -0.660
ggplot(pred2, aes(x=Dept, y=fit, group=Gender, color=Gender)) +
geom_line(linewidth = 1.4) +
geom_point(aes(y=obs), size = 3) +
labs(y="Log odds (Admitted") +
theme_bw(base_size = 16) +
theme(legend.position = c(.75, .80))
Effect plots
berk.eff2 <- allEffects(berk.logit2)
plot main effects
plot(berk.eff2)
# plot 'interaction'
plot(effect('Dept:Gender', berk.logit2), multiline=TRUE)
include a 1 df term for dept A
berkeley <- within(berkeley, dept1AG <- (Dept=='A')*(Gender=='Female'))
berk.logit3 <- glm(Admit=="Admitted" ~ Dept + Gender + dept1AG,
data=berkeley, weights=Freq, family="binomial")
Anova(berk.logit3, test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: Admit == "Admitted"
## Df Chisq Pr(>Chisq)
## Dept 5 445.12 < 2e-16 ***
## Gender 1 0.13 0.72
## dept1AG 1 15.32 9.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(effect('Dept:Gender', berk.logit3), multiline=TRUE)
obs <- log(UCBAdmissions[1,,] / UCBAdmissions[2,,])
pred3 <- cbind(berkeley[,1:3], fit=predict(berk.logit3))
pred3 <- cbind(subset(pred3, Admit=="Admitted"), obs=as.vector(obs))
head(pred3)
## Admit Gender Dept fit obs
## 1 Admitted Male A 0.492 0.492
## 3 Admitted Female A 1.544 1.544
## 5 Admitted Male B 0.544 0.534
## 7 Admitted Female B 0.513 0.754
## 9 Admitted Male C -0.596 -0.536
## 11 Admitted Female C -0.627 -0.660
ggplot(pred3, aes(x=Dept, y=fit, group=Gender, color=Gender)) +
geom_line(size=1.4) +
geom_point(aes(y=obs), size=3) +
labs(y="Log odds (Admitted") +
theme_bw(base_size = 16) +
theme(legend.position = c(.75, .80))
IycgLS0tDQojJyB0aXRsZTogIlVDQiBkYXRhOiBsb2dpdCBtb2RlbHMgZm9yIGFkbWlzc2lvbiINCiMnIGF1dGhvcjogIk1pY2hhZWwgRnJpZW5kbHkiDQojJyBkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCkpYCINCiMnIG91dHB1dDoNCiMnICAgaHRtbF9kb2N1bWVudDoNCiMnICAgICB0aGVtZTogcmVhZGFibGUNCiMnICAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQojJyAtLS0NCg0KIysgZWNobz1GQUxTRQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KA0KICB3YXJuaW5nID0gRkFMU0UsICAgIyBhdm9pZCB3YXJuaW5ncyBhbmQgbWVzc2FnZXMgaW4gdGhlIG91dHB1dA0KICBtZXNzYWdlID0gRkFMU0UNCikNCg0KIycgIyMgTG9hZCBwYWNrYWdlcw0KbGlicmFyeShjYXIpICAgICAgICMgZm9yIEFub3ZhKCkNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZWZmZWN0cykgICAjIGVmZmVjdCBwbG90cw0KDQojJyAjIyBMb2FkIHRoZSBkYXRhDQpkYXRhKFVDQkFkbWlzc2lvbnMpDQpiZXJrZWxleSA8LSBhcy5kYXRhLmZyYW1lKFVDQkFkbWlzc2lvbnMpDQoNCiMnICMjIExvZ2l0IG1vZGVsIGZvciBEZXB0DQpiZXJrLmxvZ2l0MSA8LSBnbG0oQWRtaXQ9PSJBZG1pdHRlZCIgfiBEZXB0LCBkYXRhPWJlcmtlbGV5LCANCiAgICAgICAgICAgICAgICAgd2VpZ2h0cz1GcmVxLCBmYW1pbHk9ImJpbm9taWFsIikNCkFub3ZhKGJlcmsubG9naXQxLCB0ZXN0PSJXYWxkIikNCnN1bW1hcnkoYmVyay5sb2dpdDEpDQoNCiMnICMjIE1haW4gZWZmZWN0cyBtb2RlbA0KYmVyay5sb2dpdDIgPC0gZ2xtKEFkbWl0PT0iQWRtaXR0ZWQiIH4gRGVwdCtHZW5kZXIsIA0KICAgICAgICAgICAgICAgICBkYXRhPWJlcmtlbGV5LCB3ZWlnaHRzPUZyZXEsIGZhbWlseT0iYmlub21pYWwiKQ0KQW5vdmEoYmVyay5sb2dpdDIsIHRlc3Q9IldhbGQiKQ0Kc3VtbWFyeShiZXJrLmxvZ2l0MikNCg0KIycgIyMgUGxvdHMgZm9yIGxvZ2l0IG1vZGVscw0KIycgQ2FsY3VsYXRlIHRoZSBvYnNlcnZlZCBhbmQgZml0dGVkIGxvZyBvZGRzDQpvYnMgPC0gbG9nKFVDQkFkbWlzc2lvbnNbMSwsXSAvIFVDQkFkbWlzc2lvbnNbMiwsXSkNCnByZWQyIDwtIGNiaW5kKGJlcmtlbGV5WywxOjNdLCBmaXQ9cHJlZGljdChiZXJrLmxvZ2l0MikpDQpwcmVkMiA8LSBjYmluZChzdWJzZXQocHJlZDIsIEFkbWl0PT0iQWRtaXR0ZWQiKSwgb2JzPWFzLnZlY3RvcihvYnMpKQ0KaGVhZChwcmVkMikNCg0KZ2dwbG90KHByZWQyLCBhZXMoeD1EZXB0LCB5PWZpdCwgZ3JvdXA9R2VuZGVyLCBjb2xvcj1HZW5kZXIpKSArDQogIGdlb21fbGluZShsaW5ld2lkdGggPSAxLjQpICsNCiAgZ2VvbV9wb2ludChhZXMoeT1vYnMpLCBzaXplID0gMykgKw0KICBsYWJzKHk9IkxvZyBvZGRzIChBZG1pdHRlZCIpICsNCiAgdGhlbWVfYncoYmFzZV9zaXplID0gMTYpICsNCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gYyguNzUsIC44MCkpDQoNCg0KIycgIyMgRWZmZWN0IHBsb3RzDQpiZXJrLmVmZjIgPC0gYWxsRWZmZWN0cyhiZXJrLmxvZ2l0MikNCg0KIycgcGxvdCBtYWluICBlZmZlY3RzDQpwbG90KGJlcmsuZWZmMikNCiMgcGxvdCAnaW50ZXJhY3Rpb24nDQpwbG90KGVmZmVjdCgnRGVwdDpHZW5kZXInLCBiZXJrLmxvZ2l0MiksIG11bHRpbGluZT1UUlVFKQ0KDQojJyAjIyBpbmNsdWRlIGEgMSBkZiB0ZXJtIGZvciBkZXB0IEENCmJlcmtlbGV5IDwtIHdpdGhpbihiZXJrZWxleSwgZGVwdDFBRyA8LSAoRGVwdD09J0EnKSooR2VuZGVyPT0nRmVtYWxlJykpDQpiZXJrLmxvZ2l0MyA8LSBnbG0oQWRtaXQ9PSJBZG1pdHRlZCIgfiBEZXB0ICsgR2VuZGVyICsgZGVwdDFBRywgDQogICAgICAgICAgICAgICAgICAgZGF0YT1iZXJrZWxleSwgd2VpZ2h0cz1GcmVxLCBmYW1pbHk9ImJpbm9taWFsIikNCkFub3ZhKGJlcmsubG9naXQzLCB0ZXN0PSJXYWxkIikNCnBsb3QoZWZmZWN0KCdEZXB0OkdlbmRlcicsIGJlcmsubG9naXQzKSwgbXVsdGlsaW5lPVRSVUUpDQoNCm9icyA8LSBsb2coVUNCQWRtaXNzaW9uc1sxLCxdIC8gVUNCQWRtaXNzaW9uc1syLCxdKQ0KcHJlZDMgPC0gY2JpbmQoYmVya2VsZXlbLDE6M10sIGZpdD1wcmVkaWN0KGJlcmsubG9naXQzKSkNCnByZWQzIDwtIGNiaW5kKHN1YnNldChwcmVkMywgQWRtaXQ9PSJBZG1pdHRlZCIpLCBvYnM9YXMudmVjdG9yKG9icykpDQpoZWFkKHByZWQzKQ0KDQpnZ3Bsb3QocHJlZDMsIGFlcyh4PURlcHQsIHk9Zml0LCBncm91cD1HZW5kZXIsIGNvbG9yPUdlbmRlcikpICsNCiAgZ2VvbV9saW5lKHNpemU9MS40KSArDQogIGdlb21fcG9pbnQoYWVzKHk9b2JzKSwgc2l6ZT0zKSArDQogIGxhYnMoeT0iTG9nIG9kZHMgKEFkbWl0dGVkIikgKw0KICB0aGVtZV9idyhiYXNlX3NpemUgPSAxNikgKw0KICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSBjKC43NSwgLjgwKSkNCg0KDQo=