data(Abortion, package="vcdExtra")
str(Abortion)
## 'table' num [1:2, 1:2, 1:2] 171 152 138 167 79 148 112 133
## - attr(*, "dimnames")=List of 3
## ..$ Sex : chr [1:2] "Female" "Male"
## ..$ Status : chr [1:2] "Lo" "Hi"
## ..$ Support_Abortion: chr [1:2] "Yes" "No"
Support_Abortion
is the response, Sex
and
Status
are binary, nominal explanatory variables. Q: How
does support for abortion depend on sex and status?
## (b) Caesarian Births: Caesar
data(Caesar, package = "vcdExtra")
Infection
is a 3-level response variable; the
explanatory variables are Risk
(a two-level nominal
variable), whether Antibiotics
were used (a two-level
nominal variable), and whether the Caesarian section was
Planned
or not (a two-level nominal variable). Q: How
antibiotics, risk, and whether the operation was planned are associated
with infection type (or no infection)? Similarly for the other two
datasets ## Exercise 2.4: Danish Welfare
library(vcdExtra)
## Loading required package: vcd
## Loading required package: grid
## Loading required package: gnm
data(DanishWelfare, package="vcdExtra")
## Warning in data(DanishWelfare, package = "vcdExtra"): data set 'DanishWelfare'
## not found
str(DanishWelfare)
## 'data.frame': 180 obs. of 5 variables:
## $ Freq : num 1 4 1 8 6 14 8 41 100 175 ...
## $ Alcohol: Factor w/ 3 levels "<1","1-2",">2": 1 1 1 1 1 1 1 1 1 1 ...
## $ Income : Factor w/ 4 levels "0-50","50-100",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Status : Factor w/ 3 levels "Widow","Married",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Urban : Factor w/ 5 levels "Copenhagen","SubCopenhagen",..: 1 2 3 4 5 1 2 3 4 5 ...
sum(DanishWelfare$Freq) # total cases
## [1] 5144
levels(DanishWelfare$Alcohol)
## [1] "<1" "1-2" ">2"
DanishWelfare$Alcohol <- ordered(DanishWelfare$Alcohol, levels=c("<1", "1-2", ">2"))
levels(DanishWelfare$Income)
## [1] "0-50" "50-100" "100-150" ">150"
DanishWelfare$Income <- ordered(DanishWelfare$Income, levels=c("0-50","50-100","100-150",">150"))
or, simpler, using as.ordered():
DanishWelfare$Alcohol <- as.ordered(DanishWelfare$Alcohol)
DanishWelfare$Income <- as.ordered(DanishWelfare$Income)
now they are ordered factors
str(DanishWelfare)
## 'data.frame': 180 obs. of 5 variables:
## $ Freq : num 1 4 1 8 6 14 8 41 100 175 ...
## $ Alcohol: Ord.factor w/ 3 levels "<1"<"1-2"<">2": 1 1 1 1 1 1 1 1 1 1 ...
## $ Income : Ord.factor w/ 4 levels "0-50"<"50-100"<..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Status : Factor w/ 3 levels "Widow","Married",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Urban : Factor w/ 5 levels "Copenhagen","SubCopenhagen",..: 1 2 3 4 5 1 2 3 4 5 ...
Danish.tab <- xtabs(DanishWelfare)
# same, using formula method
Danish.tab <- xtabs(Freq ~ ., data=DanishWelfare)
str(Danish.tab)
## 'xtabs' num [1:3, 1:4, 1:3, 1:5] 1 3 2 8 1 3 2 5 2 42 ...
## - attr(*, "dimnames")=List of 4
## ..$ Alcohol: chr [1:3] "<1" "1-2" ">2"
## ..$ Income : chr [1:4] "0-50" "50-100" "100-150" ">150"
## ..$ Status : chr [1:3] "Widow" "Married" "Unmarried"
## ..$ Urban : chr [1:5] "Copenhagen" "SubCopenhagen" "LargeCity" "City" ...
## - attr(*, "call")= language xtabs(formula = Freq ~ ., data = DanishWelfare)
structable(Danish.tab)
## Income 0-50 50-100 100-150 >150
## Urban Copenhagen SubCopenhagen LargeCity City Country Copenhagen SubCopenhagen LargeCity City Country Copenhagen SubCopenhagen LargeCity City Country Copenhagen SubCopenhagen LargeCity City Country
## Alcohol Status
## <1 Widow 1 4 1 8 6 8 2 7 14 5 2 3 1 5 2 42 29 17 95 46
## Married 14 8 41 100 175 42 51 62 234 255 21 30 23 87 77 24 30 50 167 232
## Unmarried 6 1 2 6 9 7 5 9 20 27 3 2 1 12 4 33 24 15 64 68
## 1-2 Widow 3 0 1 4 2 1 1 3 8 4 5 4 1 9 4 26 34 14 48 24
## Married 15 7 15 25 48 39 59 68 172 143 32 68 43 128 86 43 76 70 198 136
## Unmarried 2 3 9 9 7 12 3 11 20 23 6 10 5 21 15 36 23 48 89 64
## >2 Widow 2 0 2 1 0 3 0 2 1 3 2 1 1 1 0 21 13 5 20 8
## Married 1 2 2 7 7 14 21 14 38 35 20 31 10 36 21 23 47 21 53 36
## Unmarried 3 0 1 5 1 2 0 3 12 13 0 2 3 9 7 38 20 13 39 26
margin.table(Danish.tab, 4)
## Urban
## Copenhagen SubCopenhagen LargeCity City Country
## 552 614 594 1765 1619
Collapse categories
Danish.tab2 <- collapse.table(Danish.tab, Urban=c("City","City","City","City","NonCity"))
structable(Danish.tab2)
## Income 0-50 50-100 100-150 >150
## Urban City NonCity City NonCity City NonCity City NonCity
## Alcohol Status
## <1 Widow 14 6 31 5 11 2 183 46
## Married 163 175 389 255 161 77 271 232
## Unmarried 15 9 41 27 18 4 136 68
## 1-2 Widow 8 2 13 4 19 4 122 24
## Married 62 48 338 143 271 86 387 136
## Unmarried 23 7 46 23 42 15 196 64
## >2 Widow 5 0 6 3 5 0 59 8
## Married 12 7 87 35 97 21 144 36
## Unmarried 9 1 17 13 14 7 110 26
structable(Alcohol ~ Urban+Status+Income, Danish.tab2)
## Alcohol <1 1-2 >2
## Urban Status Income
## City Widow 0-50 14 8 5
## 50-100 31 13 6
## 100-150 11 19 5
## >150 183 122 59
## Married 0-50 163 62 12
## 50-100 389 338 87
## 100-150 161 271 97
## >150 271 387 144
## Unmarried 0-50 15 23 9
## 50-100 41 46 17
## 100-150 18 42 14
## >150 136 196 110
## NonCity Widow 0-50 6 2 0
## 50-100 5 4 3
## 100-150 2 4 0
## >150 46 24 8
## Married 0-50 175 48 7
## 50-100 255 143 35
## 100-150 77 86 21
## >150 232 136 36
## Unmarried 0-50 9 7 1
## 50-100 27 23 13
## 100-150 4 15 7
## >150 68 64 26
## Exercise 2.5
data(UKSoccer, package="vcd")
# total games
sum(UKSoccer)
## [1] 380
# marginal totals
addmargins(UKSoccer)
## Away
## Home 0 1 2 3 4 Sum
## 0 27 29 10 8 2 76
## 1 59 53 14 12 4 142
## 2 28 32 14 12 4 90
## 3 19 14 7 4 1 45
## 4 7 8 10 2 0 27
## Sum 140 136 55 38 11 380
# or
rowSums(UKSoccer)
## 0 1 2 3 4
## 76 142 90 45 27
colSums(UKSoccer)
## 0 1 2 3 4
## 140 136 55 38 11
# or
home <- margin.table(UKSoccer, 1)
away <- margin.table(UKSoccer, 2)
# proportions
prop.table(margin.table(UKSoccer,1))
## Home
## 0 1 2 3 4
## 0.20000000 0.37368421 0.23684211 0.11842105 0.07105263
prop.table(margin.table(UKSoccer,2))
## Away
## 0 1 2 3 4
## 0.36842105 0.35789474 0.14473684 0.10000000 0.02894737
# compare distribution of home & away goals
library(vcd)
goodfit(home, type="poisson")
##
## Observed and fitted values for poisson distribution
## with parameters estimated by `ML'
##
## count observed fitted pearson residual
## 0 76 85.91248 -1.0694349
## 1 142 127.73830 1.2618589
## 2 90 94.96334 -0.5093262
## 3 45 47.06516 -0.3010265
## 4 27 17.49462 0.5432891
goodfit(away, type="poisson")
##
## Observed and fitted values for poisson distribution
## with parameters estimated by `ML'
##
## count observed fitted pearson residual
## 0 140 131.238117 0.7648345
## 1 136 139.526840 -0.2985774
## 2 55 74.169531 -2.2258645
## 3 38 26.284641 2.2850967
## 4 11 6.986181 0.7488822
plot(goodfit(home, type="poisson"))
plot(goodfit(away, type="poisson"))
# make a 5 x 2 table
tab <- rbind(home,away)
names(dimnames(tab)) <- c("Team", "Goals")
chisq.test(tab)
##
## Pearson's Chi-squared test
##
## data: tab
## X-squared = 34.868, df = 4, p-value = 4.945e-07
mosaic(tab, shade=TRUE)
# mean goals scores-- need to use weighted.mean ...
weighted.mean(0:4, w=home)
## [1] 1.486842
weighted.mean(0:4, w=away)
## [1] 1.063158
# ... or else expand frequencies to a data frame of 380 games
Home <- rep(0:4, times=home)
Away <- rep(0:4, times=away)
HA <- data.frame(Home, Away)
summary(HA)
## Home Away
## Min. :0.000 Min. :0.000
## 1st Qu.:1.000 1st Qu.:0.000
## Median :1.000 Median :1.000
## Mean :1.487 Mean :1.063
## 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :4.000 Max. :4.000
boxplot(HA, ylab="Goals scored")