Here are some simple examples of working with binomial distributions.
The motivating example is vcd::WeldonDice
, reporting the
frequencies of obtaining a 5 or 6 in throws of 12 dice. If the dice were
fair, this should give a binomial distribution, Bin(12, 1/3).
library(vcd)
## Loading required package: grid
library(lattice)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.2
data(WeldonDice, package = "vcd")
WeldonDice
## n56
## 0 1 2 3 4 5 6 7 8 9 10
## 185 1149 3265 5475 6114 5194 3067 1331 403 105 18
The dbinom()
function calculates the probabilities in a
binomial, Bin(n, p) for a range of x
values.
x <- seq(0,12)
plot(
x = x,
y = dbinom(x, 12, 1 / 3),
type = "h",
lwd = 8
)
lines()
x <- seq(0,12)
plot(
x = x,
y = dbinom(x, 12, 1 / 3),
type = "h",
xlab = "Number of successes",
ylab = "Probability",
lwd = 8,
lend = "square"
)
lines(x=x, y=dbinom(x,12,1/3))
There’s a wrinkle here, in that the data reported in
WeldonDice
for x=10
represents the frequency
for 10 or more 5s or 6s.
data(WeldonDice, package="vcd")
Weldon.df <- as.data.frame(WeldonDice) # convert to data frame
x <- seq(0,12)
Prob <- dbinom(x, 12, 1/3) # binomial probabilities
Prob <- c(Prob[1:10], sum(Prob[11:13])) # sum values for 10+
Exp <- round(sum(WeldonDice)*Prob) # expected frequencies
Diff <- Weldon.df[,"Freq"] - Exp # raw residuals
Chisq = Diff^2 /Exp
data.frame(Weldon.df, Prob=round(Prob,4), Exp, Diff, Chisq)
## n56 Freq Prob Exp Diff Chisq
## 1 0 185 0.0077 203 -18 1.5960591
## 2 1 1149 0.0462 1216 -67 3.6916118
## 3 2 3265 0.1272 3345 -80 1.9133034
## 4 3 5475 0.2120 5576 -101 1.8294476
## 5 4 6114 0.2384 6273 -159 4.0301291
## 6 5 5194 0.1908 5018 176 6.1729773
## 7 6 3067 0.1113 2927 140 6.6962761
## 8 7 1331 0.0477 1255 76 4.6023904
## 9 8 403 0.0149 392 11 0.3086735
## 10 9 105 0.0033 87 18 3.7241379
## 11 10 18 0.0005 14 4 1.1428571
vcd::goodfit()
vcd::goodfit()
calculates probabilities, fitted values
and residuals for a given distribution type. The plot()
method for a goodfit
object gives a
rootogram()
.
goodfit(WeldonDice, type = "binomial", par = list(size=12)) |> plot()
# `dbinom()` can take vector inputs for the parameters
x <- seq(0,12) # values of x
p <- c(1/6, 1/3, 1/2, 2/3) # values of p
x <- rep(x, 4) # replicate, length(p) times
p <- rep(p, each=13) # replicate, each length(x)
bin.df <- data.frame(x, prob = dbinom(x, 12, p), p)
bin.df$p <- factor(bin.df$p, labels=c("1/6", "1/3", "1/2", "2/3"))
str(bin.df)
## 'data.frame': 52 obs. of 3 variables:
## $ x : int 0 1 2 3 4 5 6 7 8 9 ...
## $ prob: num 0.1122 0.2692 0.2961 0.1974 0.0888 ...
## $ p : Factor w/ 4 levels "1/6","1/3","1/2",..: 1 1 1 1 1 1 1 1 1 1 ...
expand.grid()
XP <-expand.grid(x=0:12, p=c(1/6, 1/3, 1/2, 2/3))
bin.df <- data.frame(XP, prob=dbinom(XP[,"x"], 12, XP[,"p"]))
bin.df$p <- factor(bin.df$p, labels=c("1/6", "1/3", "1/2", "2/3"))
str(bin.df)
## 'data.frame': 52 obs. of 3 variables:
## $ x : int 0 1 2 3 4 5 6 7 8 9 ...
## $ p : Factor w/ 4 levels "1/6","1/3","1/2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ prob: num 0.1122 0.2692 0.2961 0.1974 0.0888 ...
lattice::xyplot()
to plot the distributionsWith lattice, the groups = p
argument gives a separate
line for each level of p
. pch
and
col
arguments set the point symbols and colors. The
key
argument is used to define the legend for the plot.
library(lattice)
mycol <- palette()[2:5]
xyplot( prob ~ x, data=bin.df, groups=p,
xlab=list('Number of successes', cex=1.25),
ylab=list('Probability', cex=1.25),
type='b', pch=15:17, lwd=2, cex=1.25, col=mycol,
key = list(
title = 'Pr(success)',
points = list(pch=15:17, col=mycol, cex=1.25),
lines = list(lwd=2, col=mycol),
text = list(levels(bin.df$p)),
x=0.9, y=0.98, corner=c(x=1, y=1)
)
)
ggplot2
For this plot, to give separate curves for each level of
p
, I assign this to both the color
&
shape
aesthetics.
library(ggplot2)
ggplot(bin.df, aes(x = x, y = prob, color = p, shape = p)) +
geom_point(size=4) +
geom_line(linewidth = 1.25) +
labs(x = "Number of successes",
y = "Probability") +
scale_colour_discrete("Pr(success)") +
scale_shape_discrete("Pr(success)") +
theme_bw(base_size = 16) +
theme(legend.position = c(0.8, 0.8))