# data from Long (1997)
# see http://data.princeton.edu/wws509/stata/overdispersion.html for Stata analysis
hist(PhdPubs$articles, breaks=0:19, col="pink", xlim=c(0,20),
xlab="Number of Articles")
Fit the poisson model
phd.pois <- glm(articles ~ ., data=PhdPubs, family=poisson)
library(car) # Companion to Applied Regression # Companion to Applied Regression
Anova(phd.pois)
## Analysis of Deviance Table (Type II tests)
##
## Response: articles
## LR Chisq Df Pr(>Chisq)
## female 17.1 1 3.6e-05 ***
## married 6.6 1 0.01 *
## kid5 22.1 1 2.6e-06 ***
## phdprestige 1.0 1 0.32
## mentor 126.8 1 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
round(cbind(beta = coef(phd.pois),
expbeta = exp(coef(phd.pois)),
pct = 100 * (exp(coef(phd.pois)) - 1)), 3)
## beta expbeta pct
## (Intercept) 0.266 1.304 30.43
## female -0.224 0.799 -20.10
## married 0.157 1.170 17.04
## kid5 -0.185 0.831 -16.88
## phdprestige 0.025 1.026 2.57
## mentor 0.025 1.026 2.56
phd.pois1 <- update(phd.pois, . ~ .^2)
Anova(phd.pois1)
## Analysis of Deviance Table (Type II tests)
##
## Response: articles
## LR Chisq Df Pr(>Chisq)
## female 14.5 1 0.00014 ***
## married 6.2 1 0.01277 *
## kid5 19.5 1 9.8e-06 ***
## phdprestige 1.0 1 0.32655
## mentor 128.1 1 < 2e-16 ***
## female:married 0.3 1 0.60995
## female:kid5 0.1 1 0.72929
## female:phdprestige 0.2 1 0.63574
## female:mentor 0.0 1 0.91260
## married:kid5 0
## married:phdprestige 1.7 1 0.19153
## married:mentor 1.2 1 0.28203
## kid5:phdprestige 0.2 1 0.68523
## kid5:mentor 2.8 1 0.09290 .
## phdprestige:mentor 3.8 1 0.05094 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(phd.pois, phd.pois1, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: articles ~ female + married + kid5 + phdprestige + mentor
## Model 2: articles ~ female + married + kid5 + phdprestige + mentor + female:married +
## female:kid5 + female:phdprestige + female:mentor + married:kid5 +
## married:phdprestige + married:mentor + kid5:phdprestige +
## kid5:mentor + phdprestige:mentor
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 909 1634
## 2 900 1618 9 15.2 0.086 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
LRstats(phd.pois, phd.pois1)
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313 3342 1634 909 <2e-16 ***
## phd.pois1 3316 3388 1618 900 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## model diagnostics
op <- par(mfrow=c(1,2), mar=c(4,4,2,1)+.1)
plot(phd.pois, which=c(1,5))
par(op)
### component-plus-residual plot for non-linearity
crPlot(phd.pois, "mentor", pch=16, lwd=4, id = list(n=2))
## [1] 913 911 328 803
op <- par(mar=c(4,4,2,1)+.1)
crPlot(phd.pois, "mentor", pch=16, lwd=4, id = list(n=2),
cex.lab=1.4, xlab="mentor publications" )
## [1] 913 911 328 803
text(70, 2.5, paste("b =",round( coef(phd.pois)["mentor"], 3)), col="red", cex=1.5)
par(op)
### Influence plot
influencePlot(phd.pois, id = list(n=2))
## StudRes Hat CookD
## 328 -3.73 0.19336 0.3676
## 803 -1.03 0.08696 0.0149
## 913 5.38 0.00356 0.0434
## 914 5.54 0.01000 0.1098
## 915 5.15 0.03550 0.2833
library(effects) # Effect Displays for Linear, Generalized Linear, and Other Models
plot(allEffects(phd.pois))
mean and variance
with(PhdPubs, c(mean=mean(articles),
var=var(articles),
ratio=var(articles)/mean(articles)))
## mean var ratio
## 1.69 3.71 2.19
estimate of phi = dispersion parameter
with(phd.pois, deviance/df.residual)
## [1] 1.8
sum(residuals(phd.pois, type = "pearson")^2)/phd.pois$df.residual
## [1] 1.83
Fit the quasi-poisson model
phd.qpois <- glm(articles ~ ., data=PhdPubs, family=quasipoisson)
Fit the negative-binomial model
phd.nbin <- glm.nb(articles ~ ., data=PhdPubs)
Estimate of theta
summary(phd.nbin)$theta
## [1] 2.27
## compare models
LRstats(phd.pois, phd.qpois, phd.nbin)
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313 3342 1634 909 <2e-16 ***
## phd.qpois 909
## phd.nbin 3135 3169 1004 909 0.015 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## compare standard errors
phd.SE <- sqrt(cbind(
pois = diag(vcov(phd.pois)),
qpois = diag(vcov(phd.qpois)),
nbin = diag(vcov(phd.nbin))))
round(phd.SE, 4)
## pois qpois nbin
## (Intercept) 0.0996 0.1348 0.1327
## female 0.0546 0.0738 0.0726
## married 0.0613 0.0829 0.0819
## kid5 0.0401 0.0543 0.0528
## phdprestige 0.0253 0.0342 0.0343
## mentor 0.0020 0.0027 0.0032
phd.coef <- cbind(
pois = coef(phd.pois),
qpois = coef(phd.qpois),
nbin = coef(phd.nbin))
round(phd.coef,3)
## pois qpois nbin
## (Intercept) 0.266 0.266 0.213
## female -0.224 -0.224 -0.216
## married 0.157 0.157 0.153
## kid5 -0.185 -0.185 -0.176
## phdprestige 0.025 0.025 0.029
## mentor 0.025 0.025 0.029
round(cbind(beta = coef(phd.nbin),
expbeta = exp(coef(phd.nbin)),
pct = 100 * (exp(coef(phd.nbin)) - 1)), 3)
## beta expbeta pct
## (Intercept) 0.213 1.237 23.73
## female -0.216 0.806 -19.45
## married 0.153 1.165 16.51
## kid5 -0.176 0.838 -16.17
## phdprestige 0.029 1.030 2.98
## mentor 0.029 1.029 2.91
if (!require(countreg)) install.packages("countreg", repos="http://R-Forge.R-project.org")
# library(countreg) # Count Data Regression
plot(factor(articles==0) ~ mentor, data=PhdPubs,
ylevels=2:1, ylab="Zero articles",
breaks=quantile(mentor, probs=seq(0,1,.2)), cex.lab=1.25)
Fit ZI and hurdle models
phd.zip <- zeroinfl(articles ~ ., data=PhdPubs, dist="poisson")
phd.znb <- zeroinfl(articles ~ ., data=PhdPubs, dist="negbin")
phd.hp <- hurdle(articles ~ ., data=PhdPubs, dist="poisson")
phd.hnb <- hurdle(articles ~ ., data=PhdPubs, dist="negbin")
show coefficients
phd.zip
##
## Call:
## zeroinfl(formula = articles ~ ., data = PhdPubs, dist = "poisson")
##
## Count model coefficients (poisson with log link):
## (Intercept) female married kid5 phdprestige mentor
## 0.5992 -0.2088 0.1062 -0.1427 0.0070 0.0178
##
## Zero-inflation model coefficients (binomial with logit link):
## (Intercept) female married kid5 phdprestige mentor
## -0.56332 0.10816 -0.35558 0.21974 -0.00537 -0.13313
phd.hp
##
## Call:
## hurdle(formula = articles ~ ., data = PhdPubs, dist = "poisson")
##
## Count model coefficients (truncated poisson with log link):
## (Intercept) female married kid5 phdprestige mentor
## 0.63938 -0.22824 0.09899 -0.14173 -0.00279 0.01859
##
## Zero hurdle model coefficients (binomial with logit link):
## (Intercept) female married kid5 phdprestige mentor
## 0.1784 -0.2508 0.3277 -0.2854 0.0428 0.0792
LRstats(phd.pois, phd.nbin, phd.zip, phd.znb, phd.hp, phd.hnb,
sortby="BIC")
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313 3342 3301 909 <2e-16 ***
## phd.hp 3235 3292 3211 903 <2e-16 ***
## phd.zip 3234 3291 3210 903 <2e-16 ***
## phd.hnb 3131 3194 3105 902 <2e-16 ***
## phd.znb 3126 3188 3100 902 <2e-16 ***
## phd.nbin 3135 3169 3121 909 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
NB: only mentor is important in the zero model
coeftest(phd.zip)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## count_(Intercept) 0.59918 0.11861 5.05 5.3e-07 ***
## count_female -0.20879 0.06353 -3.29 0.0011 **
## count_married 0.10623 0.07097 1.50 0.1348
## count_kid5 -0.14271 0.04744 -3.01 0.0027 **
## count_phdprestige 0.00700 0.02981 0.23 0.8145
## count_mentor 0.01785 0.00233 7.65 5.3e-14 ***
## zero_(Intercept) -0.56332 0.49405 -1.14 0.2545
## zero_female 0.10816 0.28173 0.38 0.7011
## zero_married -0.35558 0.31796 -1.12 0.2637
## zero_kid5 0.21974 0.19658 1.12 0.2639
## zero_phdprestige -0.00537 0.14118 -0.04 0.9697
## zero_mentor -0.13313 0.04643 -2.87 0.0042 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(phd.znb)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## count_(Intercept) 0.3655 0.1399 2.61 0.0091 **
## count_female -0.1946 0.0758 -2.57 0.0104 *
## count_married 0.1015 0.0843 1.20 0.2291
## count_kid5 -0.1516 0.0542 -2.80 0.0053 **
## count_phdprestige 0.0156 0.0345 0.45 0.6511
## count_mentor 0.0244 0.0035 6.97 5.9e-12 ***
## zero_(Intercept) -0.2943 1.2827 -0.23 0.8186
## zero_female 0.6580 0.8557 0.77 0.4421
## zero_married -1.4741 0.9176 -1.61 0.1085
## zero_kid5 0.6241 0.4410 1.42 0.1574
## zero_phdprestige -0.0113 0.2888 -0.04 0.9689
## zero_mentor -0.8765 0.3152 -2.78 0.0055 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
phd.zip1 <- zeroinfl(articles ~ .| mentor, data=PhdPubs, dist="poisson")
phd.znb1 <- zeroinfl(articles ~ .| mentor, data=PhdPubs, dist="negbin")
summary(phd.zip1)
##
## Call:
## zeroinfl(formula = articles ~ . | mentor, data = PhdPubs, dist = "poisson")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -2.295 -0.871 -0.262 0.540 7.243
##
## Count model coefficients (poisson with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.58819 0.10996 5.35 8.8e-08 ***
## female -0.21791 0.05879 -3.71 0.00021 ***
## married 0.13597 0.06602 2.06 0.03945 *
## kid5 -0.16262 0.04338 -3.75 0.00018 ***
## phdprestige 0.00671 0.02729 0.25 0.80574
## mentor 0.01804 0.00229 7.88 3.3e-15 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6865 0.2059 -3.33 0.00086 ***
## mentor -0.1303 0.0404 -3.23 0.00125 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of iterations in BFGS optimization: 16
## Log-likelihood: -1.61e+03 on 8 Df
LRstats(phd.pois, phd.nbin, phd.zip, phd.znb, phd.hp, phd.hnb, phd.zip1, phd.znb1,
sortby="BIC")
## Likelihood summary table:
## AIC BIC LR Chisq Df Pr(>Chisq)
## phd.pois 3313 3342 3301 909 <2e-16 ***
## phd.hp 3235 3292 3211 903 <2e-16 ***
## phd.zip 3234 3291 3210 903 <2e-16 ***
## phd.zip1 3227 3266 3211 907 <2e-16 ***
## phd.hnb 3131 3194 3105 902 <2e-16 ***
## phd.znb 3126 3188 3100 902 <2e-16 ***
## phd.nbin 3135 3169 3121 909 <2e-16 ***
## phd.znb1 3124 3168 3106 906 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(allEffects(phd.nbin)[c(1,3,5)], rows=1, cols=3)
plot the zero model
phd.zero <- glm((articles==0) ~ mentor, data=PhdPubs, family=binomial)
plot(allEffects(phd.zero), main="Mentor effect on not publishing")