library(car)
library(nnet) # for `multinom()`
library(dplyr)
library(effects)
library(ggplot2)
data(Womenlf, package = "carData")
not.work
the reference categoryWomenlf <- within (Womenlf, {
partic <- ordered(partic, levels=c('not.work', 'parttime', 'fulltime'))})
wlf.multinom <- multinom(partic ~ hincome + children,
data=Womenlf, Hess=TRUE)
## # weights: 12 (6 variable)
## initial value 288.935032
## iter 10 value 211.454772
## final value 211.440963
## converged
summary(wlf.multinom, Wald=TRUE)
## Call:
## multinom(formula = partic ~ hincome + children, data = Womenlf,
## Hess = TRUE)
##
## Coefficients:
## (Intercept) hincome childrenpresent
## parttime -1.43 0.00689 0.0215
## fulltime 1.98 -0.09723 -2.5586
##
## Std. Errors:
## (Intercept) hincome childrenpresent
## parttime 0.592 0.0235 0.469
## fulltime 0.484 0.0281 0.362
##
## Value/SE (Wald statistics):
## (Intercept) hincome childrenpresent
## parttime -2.42 0.294 0.0457
## fulltime 4.10 -3.461 -7.0641
##
## Residual Deviance: 423
## AIC: 435
broom::tidy()
puts this in a more convenient format
broom::tidy(wlf.multinom)
## # A tibble: 6 x 6
## y.level term estimate std.error statistic p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 parttime (Intercept) -1.43 0.592 -2.42 1.56e- 2
## 2 parttime hincome 0.00689 0.0235 0.294 7.69e- 1
## 3 parttime childrenpresent 0.0215 0.469 0.0457 9.64e- 1
## 4 fulltime (Intercept) 1.98 0.484 4.10 4.22e- 5
## 5 fulltime hincome -0.0972 0.0281 -3.46 5.39e- 4
## 6 fulltime childrenpresent -2.56 0.362 -7.06 1.62e-12
Anova(wlf.multinom)
## Analysis of Deviance Table (Type II tests)
##
## Response: partic
## LR Chisq Df Pr(>Chisq)
## hincome 15.2 2 0.00051 ***
## children 63.6 2 1.6e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test the hypothesis that all coefficients = 0. For
linearHypothesis()
, need to construct the combinations of
the dichotomies and the predictors.
(H <- outer(c("parttime:", "fulltime:"),
c("childrenpresent", "hincome"),
paste0))
## [,1] [,2]
## [1,] "parttime:childrenpresent" "parttime:hincome"
## [2,] "fulltime:childrenpresent" "fulltime:hincome"
car::linearHypothesis(wlf.multinom, as.vector(H))
## Linear hypothesis test
##
## Hypothesis:
## parttime:childrenpresent = 0
## fulltime:childrenpresent = 0
## parttime:hincome = 0
## fulltime:hincome = 0
##
## Model 1: restricted model
## Model 2: partic ~ hincome + children
##
## Df Chisq Pr(>Chisq)
## 1
## 2 4 58.4 6.2e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(wlf.multinom)
## (Intercept) hincome childrenpresent
## parttime -1.43 0.00689 0.0215
## fulltime 1.98 -0.09723 -2.5586
exp(coef(wlf.multinom))
## (Intercept) hincome childrenpresent
## parttime 0.239 1.007 1.0217
## fulltime 7.263 0.907 0.0774
stats <- summary(wlf.multinom, Wald=TRUE)
z <- stats$Wald.ratios
p <- 2 * (1 - pnorm(abs(z)))
zapsmall(p)
## (Intercept) hincome childrenpresent
## parttime 0.016 0.769 0.964
## fulltime 0.000 0.001 0.000
op <- par(mfrow=c(1,2))
predictors <- expand.grid(hincome=1:45, children=c('absent', 'present'))
p.fit <- predict(wlf.multinom, predictors, type='probs')
Hinc <- 1:max(predictors$hincome)
for ( kids in c("absent", "present") ) {
data <- subset(data.frame(predictors, p.fit), children==kids)
plot( range(Hinc), c(0,1), type="n",
xlab="Husband's Income", ylab='Fitted Probability',
main = paste("Children", kids))
lines(Hinc, data[, 'not.work'], lwd=3, col="black", lty=1)
lines(Hinc, data[, 'parttime'], lwd=3, col="blue", lty=2)
lines(Hinc, data[, 'fulltime'], lwd=3, col="red", lty=3)
if (kids=="absent") {
legend(5, 0.97, lty=1:3, lwd=3, col=c("black", "blue", "red"),
legend=c('not working', 'part-time', 'full-time'))
}
}
par(op)
wlf.effects <- allEffects(wlf.multinom)
plot(wlf.effects, style='stacked')
plot(Effect(c("hincome", "children"), wlf.multinom),
style = "stacked", key.args=list(x=.75, y=.25),
colors = c(grey(.85), "pink", "lightblue")
)
get fitted probabilities
options(digits=3)
predictors <- expand.grid(hincome=1:50, children=c('absent', 'present'))
fit <- data.frame(predictors,
predict(wlf.multinom, predictors, type='probs'))
library(tidyr)
tidyr::gather()
plotdat <- fit |>
gather(key="Level", value="Probability", not.work:fulltime)
plotdat <- fit |>
pivot_longer(not.work:fulltime,
names_to = "Level",
values_to = "Probability")
head(plotdat)
## # A tibble: 6 x 4
## hincome children Level Probability
## <int> <fct> <chr> <dbl>
## 1 1 absent not.work 0.128
## 2 1 absent parttime 0.0307
## 3 1 absent fulltime 0.842
## 4 2 absent not.work 0.138
## 5 2 absent parttime 0.0335
## 6 2 absent fulltime 0.828
library(directlabels)
gg <-
ggplot(plotdat, aes(x = hincome, y = Probability, colour = Level)) +
geom_line(size=1.5) + theme_bw() +
facet_grid(~ children, labeller = label_both) +
theme_bw(base_size = 14)
direct.label(gg, list("top.bumptwice", dl.trans(y = y + 0.2)))