Here are some simple examples of working with binomial distributions.
The motivating example is vcd::WeldonDice, reporting the
frequencies of obtaining a 5 or 6 in throws of 12 dice. If the dice were
fair, this should give a binomial distribution, Bin(12, 1/3).
library(vcd)
## Loading required package: grid
library(lattice)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.2
data(WeldonDice, package = "vcd")
WeldonDice
## n56
## 0 1 2 3 4 5 6 7 8 9 10
## 185 1149 3265 5475 6114 5194 3067 1331 403 105 18
The dbinom() function calculates the probabilities in a
binomial, Bin(n, p) for a range of x values.
x <- seq(0,12)
plot(
x = x,
y = dbinom(x, 12, 1 / 3),
type = "h",
lwd = 8
)
lines()x <- seq(0,12)
plot(
x = x,
y = dbinom(x, 12, 1 / 3),
type = "h",
xlab = "Number of successes",
ylab = "Probability",
lwd = 8,
lend = "square"
)
lines(x=x, y=dbinom(x,12,1/3))
There’s a wrinkle here, in that the data reported in
WeldonDice for x=10 represents the frequency
for 10 or more 5s or 6s.
data(WeldonDice, package="vcd")
Weldon.df <- as.data.frame(WeldonDice) # convert to data frame
x <- seq(0,12)
Prob <- dbinom(x, 12, 1/3) # binomial probabilities
Prob <- c(Prob[1:10], sum(Prob[11:13])) # sum values for 10+
Exp <- round(sum(WeldonDice)*Prob) # expected frequencies
Diff <- Weldon.df[,"Freq"] - Exp # raw residuals
Chisq = Diff^2 /Exp
data.frame(Weldon.df, Prob=round(Prob,4), Exp, Diff, Chisq)
## n56 Freq Prob Exp Diff Chisq
## 1 0 185 0.0077 203 -18 1.5960591
## 2 1 1149 0.0462 1216 -67 3.6916118
## 3 2 3265 0.1272 3345 -80 1.9133034
## 4 3 5475 0.2120 5576 -101 1.8294476
## 5 4 6114 0.2384 6273 -159 4.0301291
## 6 5 5194 0.1908 5018 176 6.1729773
## 7 6 3067 0.1113 2927 140 6.6962761
## 8 7 1331 0.0477 1255 76 4.6023904
## 9 8 403 0.0149 392 11 0.3086735
## 10 9 105 0.0033 87 18 3.7241379
## 11 10 18 0.0005 14 4 1.1428571
vcd::goodfit()vcd::goodfit() calculates probabilities, fitted values
and residuals for a given distribution type. The plot()
method for a goodfit object gives a
rootogram().
goodfit(WeldonDice, type = "binomial", par = list(size=12)) |> plot()
# `dbinom()` can take vector inputs for the parameters
x <- seq(0,12) # values of x
p <- c(1/6, 1/3, 1/2, 2/3) # values of p
x <- rep(x, 4) # replicate, length(p) times
p <- rep(p, each=13) # replicate, each length(x)
bin.df <- data.frame(x, prob = dbinom(x, 12, p), p)
bin.df$p <- factor(bin.df$p, labels=c("1/6", "1/3", "1/2", "2/3"))
str(bin.df)
## 'data.frame': 52 obs. of 3 variables:
## $ x : int 0 1 2 3 4 5 6 7 8 9 ...
## $ prob: num 0.1122 0.2692 0.2961 0.1974 0.0888 ...
## $ p : Factor w/ 4 levels "1/6","1/3","1/2",..: 1 1 1 1 1 1 1 1 1 1 ...
expand.grid()XP <-expand.grid(x=0:12, p=c(1/6, 1/3, 1/2, 2/3))
bin.df <- data.frame(XP, prob=dbinom(XP[,"x"], 12, XP[,"p"]))
bin.df$p <- factor(bin.df$p, labels=c("1/6", "1/3", "1/2", "2/3"))
str(bin.df)
## 'data.frame': 52 obs. of 3 variables:
## $ x : int 0 1 2 3 4 5 6 7 8 9 ...
## $ p : Factor w/ 4 levels "1/6","1/3","1/2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ prob: num 0.1122 0.2692 0.2961 0.1974 0.0888 ...
lattice::xyplot() to plot the distributionsWith lattice, the groups = p argument gives a separate
line for each level of p. pch and
col arguments set the point symbols and colors. The
key argument is used to define the legend for the plot.
library(lattice)
mycol <- palette()[2:5]
xyplot( prob ~ x, data=bin.df, groups=p,
xlab=list('Number of successes', cex=1.25),
ylab=list('Probability', cex=1.25),
type='b', pch=15:17, lwd=2, cex=1.25, col=mycol,
key = list(
title = 'Pr(success)',
points = list(pch=15:17, col=mycol, cex=1.25),
lines = list(lwd=2, col=mycol),
text = list(levels(bin.df$p)),
x=0.9, y=0.98, corner=c(x=1, y=1)
)
)
ggplot2For this plot, to give separate curves for each level of
p, I assign this to both the color &
shape aesthetics.
library(ggplot2)
ggplot(bin.df, aes(x = x, y = prob, color = p, shape = p)) +
geom_point(size=4) +
geom_line(linewidth = 1.25) +
labs(x = "Number of successes",
y = "Probability") +
scale_colour_discrete("Pr(success)") +
scale_shape_discrete("Pr(success)") +
theme_bw(base_size = 16) +
theme(legend.position = c(0.8, 0.8))