## Loading required package: vcd
## Loading required package: grid
## Loading required package: gnm
Exercise 2.3
data(UCBAdmissions)
UCB <- UCBAdmissions # save typing
ftable(UCB)
## Dept A B C D E F
## Admit Gender
## Admitted Male 512 353 120 138 53 22
## Female 89 17 202 131 94 24
## Rejected Male 313 207 205 279 138 351
## Female 19 8 391 244 299 317
- total number of cases
sum(UCB)
## [1] 4526
- total applicants for each dept
margin.table(UCB, 3)
## Dept
## A B C D E F
## 933 585 918 792 584 714
apply(UCBAdmissions, 3, sum)
## A B C D E F
## 933 585 918 792 584 714
- proportion admitted for each dept
tab1 <- margin.table(UCB , c(1,3))
tab1
## Dept
## Admit A B C D E F
## Admitted 601 370 322 269 147 46
## Rejected 332 215 596 523 437 668
tab1[1,] / margin.table(UCB, 3)
## Dept
## A B C D E F
## 0.64415863 0.63247863 0.35076253 0.33964646 0.25171233 0.06442577
or, using prop.table
prop.table(margin.table(UCB, c(1, 3)),2)
## Dept
## Admit A B C D E F
## Admitted 0.64415863 0.63247863 0.35076253 0.33964646 0.25171233 0.06442577
## Rejected 0.35584137 0.36752137 0.64923747 0.66035354 0.74828767 0.93557423
- proportion in each cell admitted There are different ways to do
this. Perhaps easiest is to extract the subtables of admitted
vs. rejected and divide
admit <- UCB[1,,]
reject <- UCB[2,,]
admit / (admit+reject)
## Dept
## Gender A B C D E F
## Male 0.62060606 0.63035714 0.36923077 0.33093525 0.27748691 0.05898123
## Female 0.82407407 0.68000000 0.34064081 0.34933333 0.23918575 0.07038123
Exercise 3.1: Arbuthnot data
load the data
data(Arbuthnot, package="HistData")
- Plot
Ratio
vs. Year
This uses old-school
base R features: plot()
, abline()
,
loess.smooth()
and lines()
plot(Ratio ~ Year,
data=Arbuthnot,
type="b",
ylim=c(1.0,1.2),
ylab="M/F Ratio")
abline(h=1.0,col="red",lwd=2)
abline(h=mean(Arbuthnot$Ratio), col="blue")
Arb.sm <- loess.smooth(Arbuthnot$Year, Arbuthnot$Ratio)
lines(Arb.sm, col="blue", lwd=2)

using with() evaluates in the environment of a data frame, saves
typing Arbuthnot$…
with(Arbuthnot, {
plot(Ratio ~ Year,
type="b",
ylim=c(1.0,1.2),
ylab="M/F Ratio")
abline(h=1.0, col="red", lwd=2)
abline(h=mean(Ratio), col="blue")
lines(loess.smooth(Year, Ratio), col="blue", lwd=2)
})

(b) Total christenings
with(Arbuthnot, {
Total <- Males + Females
plot(Total ~ Year,
type="b",
ylab="Total Christenings")
Arb.sm <- loess.smooth(Year, Total)
lines(Arb.sm, col="blue", lwd=2)
})

Part (a) using ggplot
library(ggplot2)
Arbuthnot |>
ggplot(aes(x=Year, y=Ratio)) +
geom_point() +
geom_line() +
geom_smooth(method = "loess", formula = y~x, color = "blue", fill="blue") +
geom_hline(yintercept = 1, color = "red", size = 2) +
geom_hline(aes(yintercept = mean(Ratio)), color = "lightblue", size = 1.5) +
ylab("Male / Female Ratio")

Exercise 3.3: WomenQueue
- Barplots
data(WomenQueue, package = "vcd")
barplot(WomenQueue, xlab="Number of Women in Queues of 10 People",
ylab="Number of Queues", col="pink")

- Goodness of fit
WQfit1 <- goodfit(WomenQueue, type="binomial", par=list(size=10))
unlist(WQfit1$par)
## prob size
## 0.435 10.000
summary(WQfit1)
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Likelihood Ratio 8.650999 8 0.3725869
- Plot
plot(WQfit1, type="hanging")

Exercise 3.4: Saxony data
data(Saxony, package = "vcd")
- To test goodness of fit of the binomial with an estimated value,
\(\hat{p}\), only specify max. count,
size
as a parameter
Saxfit <- goodfit(Saxony, type="binomial", par=list(size=12))
unlist(Saxfit$par)
## prob size
## 0.519215 12.000000
The summary() method gives the likelihood ratio test for goodness of
fit. \(X^2 / df = 97 / 11 = 8.81\) is
large (should be \(\approx 1\) for a
good fitting model).
summary(Saxfit)
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Likelihood Ratio 97.0065 11 6.978187e-16
Plot with a hanging rootogram
plot(Saxfit)

- To test the model \(Bin(n=12, p =
1/2)\), you need to also specify the
prob
parameter
Saxfit1 <- goodfit(Saxony, type="binomial", par=list(size=12, prob=0.5))
unlist(Saxfit1$par)
## prob size
## 0.5 12.0
Again, get the GOF test from summary()
To test for
additional lack of fit for the model \(Bin(n=12, p = 1/2)\) compared to the model
\(Bin(n=12, p = \hat{p})\), you have to
subtract manually: \(\Delta \chi^2 = 205 - 97
= 108\) on 1 df.
summary(Saxfit1)
##
## Goodness-of-fit test for binomial distribution
##
## X^2 df P(> X^2)
## Pearson 249.1954 12 2.013281e-46
## Likelihood Ratio 205.4060 12 2.493625e-37
Plot the constrained model with a hanging rootogram
plot(Saxfit1)

IycgLS0tDQojJyB0aXRsZTogIkFzc2lnbm1lbnQgMjogU29tZSBzb2x1dGlvbnMiDQojJyBhdXRob3I6ICJNaWNoYWVsIEZyaWVuZGx5Ig0KIycgZGF0ZTogImByIGZvcm1hdChTeXMuRGF0ZSgpKWAiDQojJyBvdXRwdXQ6DQojJyAgIGh0bWxfZG9jdW1lbnQ6DQojJyAgICAgdGhlbWU6IHJlYWRhYmxlDQojJyAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KIycgLS0tDQoNCiMrIGVjaG89RkFMU0UNCmtuaXRyOjpvcHRzX2NodW5rJHNldCgNCiAgd2FybmluZyA9IEZBTFNFLCAgICMgYXZvaWQgd2FybmluZ3MgYW5kIG1lc3NhZ2VzIGluIHRoZSBvdXRwdXQNCiAgbWVzc2FnZSA9IEZBTFNFDQopDQoNCg0KbGlicmFyeSh2Y2RFeHRyYSkNCg0KIycgIyMgRXhlcmNpc2UgMi4zDQoNCmRhdGEoVUNCQWRtaXNzaW9ucykNClVDQiA8LSBVQ0JBZG1pc3Npb25zICAgIyBzYXZlIHR5cGluZw0KZnRhYmxlKFVDQikNCg0KIycgKGEpIHRvdGFsIG51bWJlciBvZiBjYXNlcw0Kc3VtKFVDQikNCg0KIycgKGIpIHRvdGFsIGFwcGxpY2FudHMgZm9yIGVhY2ggZGVwdA0KbWFyZ2luLnRhYmxlKFVDQiwgMykNCmFwcGx5KFVDQkFkbWlzc2lvbnMsIDMsIHN1bSkNCg0KIycgKGMpIHByb3BvcnRpb24gYWRtaXR0ZWQgZm9yIGVhY2ggZGVwdA0KdGFiMSA8LSBtYXJnaW4udGFibGUoVUNCICwgYygxLDMpKQ0KdGFiMQ0KdGFiMVsxLF0gLyBtYXJnaW4udGFibGUoVUNCLCAzKQ0KIycgb3IsIHVzaW5nIHByb3AudGFibGUNCnByb3AudGFibGUobWFyZ2luLnRhYmxlKFVDQiwgYygxLCAzKSksMikNCg0KDQojJyAoZCkgcHJvcG9ydGlvbiBpbiBlYWNoIGNlbGwgYWRtaXR0ZWQNCiMnIFRoZXJlIGFyZSBkaWZmZXJlbnQgd2F5cyB0byBkbyB0aGlzLiBQZXJoYXBzIGVhc2llc3QgaXMgdG8gZXh0cmFjdCB0aGUgc3VidGFibGVzIG9mDQojJyBhZG1pdHRlZCB2cy4gcmVqZWN0ZWQgYW5kIGRpdmlkZQ0KYWRtaXQgPC0gVUNCWzEsLF0NCnJlamVjdCA8LSBVQ0JbMiwsXQ0KDQphZG1pdCAvIChhZG1pdCtyZWplY3QpDQoNCiMnICMjIEV4ZXJjaXNlIDMuMTogQXJidXRobm90IGRhdGENCg0KIycgbG9hZCB0aGUgZGF0YQ0KZGF0YShBcmJ1dGhub3QsIHBhY2thZ2U9Ikhpc3REYXRhIikNCg0KIycgKGEpIFBsb3QgYFJhdGlvYCB2cy4gYFllYXJgDQojJyBUaGlzIHVzZXMgb2xkLXNjaG9vbCBiYXNlIFIgZmVhdHVyZXM6IGBwbG90KClgLCBgYWJsaW5lKClgLCBgbG9lc3Muc21vb3RoKClgIGFuZCBgbGluZXMoKWANCnBsb3QoUmF0aW8gfiBZZWFyLCANCiAgICAgZGF0YT1BcmJ1dGhub3QsIA0KICAgICB0eXBlPSJiIiwgDQogICAgIHlsaW09YygxLjAsMS4yKSwgDQogICAgIHlsYWI9Ik0vRiBSYXRpbyIpDQphYmxpbmUoaD0xLjAsY29sPSJyZWQiLGx3ZD0yKQ0KYWJsaW5lKGg9bWVhbihBcmJ1dGhub3QkUmF0aW8pLCBjb2w9ImJsdWUiKQ0KQXJiLnNtIDwtIGxvZXNzLnNtb290aChBcmJ1dGhub3QkWWVhciwgQXJidXRobm90JFJhdGlvKQ0KbGluZXMoQXJiLnNtLCBjb2w9ImJsdWUiLCBsd2Q9MikNCg0KIycgdXNpbmcgd2l0aCgpIGV2YWx1YXRlcyBpbiB0aGUgZW52aXJvbm1lbnQgb2YgYSBkYXRhIGZyYW1lLCBzYXZlcyB0eXBpbmcgQXJidXRobm90JC4uLg0Kd2l0aChBcmJ1dGhub3QsIHsNCglwbG90KFJhdGlvIH4gWWVhciwgDQoJICAgICB0eXBlPSJiIiwgDQoJICAgICB5bGltPWMoMS4wLDEuMiksIA0KCSAgICAgeWxhYj0iTS9GIFJhdGlvIikNCglhYmxpbmUoaD0xLjAsIGNvbD0icmVkIiwgbHdkPTIpDQoJYWJsaW5lKGg9bWVhbihSYXRpbyksIGNvbD0iYmx1ZSIpDQoJbGluZXMobG9lc3Muc21vb3RoKFllYXIsIFJhdGlvKSwgY29sPSJibHVlIiwgbHdkPTIpDQoJfSkNCg0KIycgIyMjIChiKSBUb3RhbCBjaHJpc3RlbmluZ3MNCndpdGgoQXJidXRobm90LCB7DQoJVG90YWwgPC0gTWFsZXMgKyBGZW1hbGVzDQoJcGxvdChUb3RhbCB+IFllYXIsIA0KCSAgICAgdHlwZT0iYiIsIA0KCSAgICAgeWxhYj0iVG90YWwgQ2hyaXN0ZW5pbmdzIikNCglBcmIuc20gPC0gbG9lc3Muc21vb3RoKFllYXIsIFRvdGFsKQ0KCWxpbmVzKEFyYi5zbSwgY29sPSJibHVlIiwgbHdkPTIpDQoJfSkNCg0KIycgIyMjIFBhcnQgKGEpIHVzaW5nIGBnZ3Bsb3RgDQpsaWJyYXJ5KGdncGxvdDIpDQpBcmJ1dGhub3QgfD4NCiAgZ2dwbG90KGFlcyh4PVllYXIsIHk9UmF0aW8pKSArDQogICAgZ2VvbV9wb2ludCgpICsNCiAgICBnZW9tX2xpbmUoKSArDQogICAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxvZXNzIiwgZm9ybXVsYSA9IHl+eCwgY29sb3IgPSAiYmx1ZSIsIGZpbGw9ImJsdWUiKSArDQogICAgZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMSwgY29sb3IgPSAicmVkIiwgc2l6ZSA9IDIpICsNCiAgICBnZW9tX2hsaW5lKGFlcyh5aW50ZXJjZXB0ID0gbWVhbihSYXRpbykpLCBjb2xvciA9ICJsaWdodGJsdWUiLCBzaXplID0gMS41KSArDQogICAgeWxhYigiTWFsZSAvIEZlbWFsZSBSYXRpbyIpDQogIA0KDQojJyAjIyBFeGVyY2lzZSAzLjM6IFdvbWVuUXVldWUNCg0KIycgKGEpIEJhcnBsb3RzDQpkYXRhKFdvbWVuUXVldWUsIHBhY2thZ2UgPSAidmNkIikNCmJhcnBsb3QoV29tZW5RdWV1ZSwgeGxhYj0iTnVtYmVyIG9mIFdvbWVuIGluIFF1ZXVlcyBvZiAxMCBQZW9wbGUiLA0KCXlsYWI9Ik51bWJlciBvZiBRdWV1ZXMiLCBjb2w9InBpbmsiKQ0KDQojJyAoYikgR29vZG5lc3Mgb2YgZml0DQpXUWZpdDEgPC0gZ29vZGZpdChXb21lblF1ZXVlLCB0eXBlPSJiaW5vbWlhbCIsIHBhcj1saXN0KHNpemU9MTApKQ0KdW5saXN0KFdRZml0MSRwYXIpDQpzdW1tYXJ5KFdRZml0MSkNCg0KIycgKGMpIFBsb3QNCnBsb3QoV1FmaXQxLCB0eXBlPSJoYW5naW5nIikNCg0KIycgIyMgRXhlcmNpc2UgMy40OiBTYXhvbnkgZGF0YQ0KZGF0YShTYXhvbnksIHBhY2thZ2UgPSAidmNkIikNCg0KIycgKGEpIFRvIHRlc3QgZ29vZG5lc3Mgb2YgZml0IG9mIHRoZSBiaW5vbWlhbCB3aXRoIGFuIGVzdGltYXRlZCB2YWx1ZSwgJFxoYXR7cH0kLCBvbmx5IHNwZWNpZnkgbWF4LiBjb3VudCwgYHNpemVgIGFzIGEgcGFyYW1ldGVyDQpTYXhmaXQgPC0gZ29vZGZpdChTYXhvbnksIHR5cGU9ImJpbm9taWFsIiwgcGFyPWxpc3Qoc2l6ZT0xMikpDQp1bmxpc3QoU2F4Zml0JHBhcikNCiMnIFRoZSBzdW1tYXJ5KCkgbWV0aG9kIGdpdmVzIHRoZSBsaWtlbGlob29kIHJhdGlvIHRlc3QgZm9yIGdvb2RuZXNzIG9mIGZpdC4NCiMnICRYXjIgLyBkZiA9IDk3IC8gMTEgPSA4LjgxJCBpcyBsYXJnZSAoc2hvdWxkIGJlICRcYXBwcm94IDEkIGZvciBhIGdvb2QgZml0dGluZyBtb2RlbCkuDQpzdW1tYXJ5KFNheGZpdCkNCg0KIycgUGxvdCB3aXRoIGEgaGFuZ2luZyByb290b2dyYW0NCnBsb3QoU2F4Zml0KQ0KDQoNCiMnIChiKSBUbyB0ZXN0IHRoZSBtb2RlbCAkQmluKG49MTIsIHAgPSAxLzIpJCwgeW91IG5lZWQgdG8gYWxzbyBzcGVjaWZ5IHRoZSBgcHJvYmAgcGFyYW1ldGVyDQpTYXhmaXQxIDwtIGdvb2RmaXQoU2F4b255LCB0eXBlPSJiaW5vbWlhbCIsIHBhcj1saXN0KHNpemU9MTIsIHByb2I9MC41KSkNCnVubGlzdChTYXhmaXQxJHBhcikNCg0KIycgQWdhaW4sIGdldCB0aGUgR09GIHRlc3QgZnJvbSBgc3VtbWFyeSgpYA0KIycgVG8gdGVzdCBmb3IgYWRkaXRpb25hbCBsYWNrIG9mIGZpdCBmb3IgdGhlIG1vZGVsICRCaW4obj0xMiwgcCA9IDEvMikkIGNvbXBhcmVkIHRvIHRoZSBtb2RlbA0KIycgJEJpbihuPTEyLCBwID0gXGhhdHtwfSkkLCB5b3UgaGF2ZSB0byBzdWJ0cmFjdCBtYW51YWxseTogJFxEZWx0YSBcY2hpXjIgPSAyMDUgLSA5NyA9IDEwOCQgb24gMSBkZi4NCnN1bW1hcnkoU2F4Zml0MSkNCg0KIycgUGxvdCB0aGUgY29uc3RyYWluZWQgbW9kZWwgd2l0aCBhIGhhbmdpbmcgcm9vdG9ncmFtDQpwbG90KFNheGZpdDEpDQoNCg0KDQoNCg==