This script reproduces all of the analysis and graphs for the MANOVA of the SocialCog
data in the paper and also includes other analyses not described there. It is set up as an R script that can be “compiled” to HTML, Word, or PDF using knitr::knit()
. This is most convenient within R Studio via the File -> Compile Notebook
option.
library(heplots)
library(candisc)
library(mvinfluence)
data(SocialCog, package="heplots")
str(SocialCog)
## 'data.frame': 139 obs. of 5 variables:
## $ Dx : Factor w/ 3 levels "Schizophrenia",..: 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "contrasts")= num [1:3, 1:2] -0.5 -0.5 1 1 -1 0
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr "Schizophrenia" "Schizoaffective" "Control"
## .. .. ..$ : NULL
## $ MgeEmotions: int 28 37 24 28 28 28 44 39 24 32 ...
## $ ToM : int 10 12 19 17 17 13 19 20 26 18 ...
## $ ExtBias : int 1 -33 -1 -6 8 2 -2 -1 5 -2 ...
## $ PersBias : num 0.312 0.583 1 0.333 0.909 ...
## - attr(*, "na.action")=Class 'omit' Named int [1:124] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..- attr(*, "names")= chr [1:124] "1" "2" "3" "4" ...
SC.mlm <- lm(cbind(MgeEmotions,ToM, ExtBias, PersBias) ~ Dx,
data=SocialCog)
Anova(SC.mlm)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Dx 2 0.212 3.97 8 268 0.00018 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Test contrasts of substantive interest: Dx1
: Control group vs. schizo groups; Dx2
: Schizophrenia group vs. schizoaffective.
print(linearHypothesis(SC.mlm, "Dx1"), SSP=FALSE)
##
## Multivariate Tests:
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.1355 5.212 4 133 0.000624 ***
## Wilks 1 0.8645 5.212 4 133 0.000624 ***
## Hotelling-Lawley 1 0.1568 5.212 4 133 0.000624 ***
## Roy 1 0.1568 5.212 4 133 0.000624 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(linearHypothesis(SC.mlm, "Dx2"), SSP=FALSE)
##
## Multivariate Tests:
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0697 2.493 4 133 0.0461 *
## Wilks 1 0.9303 2.493 4 133 0.0461 *
## Hotelling-Lawley 1 0.0750 2.493 4 133 0.0461 *
## Roy 1 0.0750 2.493 4 133 0.0461 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This plot shows the first two response variables. We can also embed the H ellipses for the contrasts to see how they relate to differences in the group means.
heplot(SC.mlm, hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
fill=TRUE, fill.alpha=.1,
cex.lab=1.5, cex=1.2)
The pairs()
plot shows all pairwise views
pairs(SC.mlm, fill=c(TRUE,FALSE), fill.alpha=.1,
hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"))
How do the groups differ in the canonical space accounting for the greatest group separation?
SC.can <- candisc(SC.mlm)
SC.can
##
## Canonical Discriminant Analysis for Dx:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.1756 0.2129 0.175 84.9 84.9
## 2 0.0365 0.0379 0.175 15.1 100.0
##
## Test of H0: The canonical correlations in the
## current row and all that follow are zero
##
## LR test stat approx F num Df den Df Pr(> F)
## 1 0.794 8.24 4 270 2.8e-06 ***
## 2 0.963 5.15 1 136 0.025 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The plot shows the canonical scores with data ellipses and variable vectors indicating how they relate to the canonical dimensions. There is a lot of overlap, but the groups are clearly separated along both canonical dimensions.
col <- c("red", "darkgreen", "blue")
plot(SC.can, ellipse=TRUE, pch=c(7,9,10),
var.cex=1.2, cex.lab=1.5, var.lwd=2, col=col,
var.col="black", # var.pos=pos,
prefix="Canonical dimension ")
An equivalent plot, as an HE plot. We can also project the contrasts among groups into this space.
heplot(SC.can, hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
fill=TRUE, fill.alpha=.1,
var.lwd=2, var.col="black",
label.pos=c(3,1),
cex=1.25, cex.lab=1.2,
prefix="Canonical dimension ")
## Vector scale factor set to 2.777
cqplot(SC.mlm, id.n=1, main="", cex.lab=1.25)
Robust version, using MVE shows 4 possible outliers!
cqplot(SC.mlm, method="mve", id.n=4, main="", cex.lab=1.25)
One extreme outlier spoils the pie! The influence plot shows how it stands out.
influencePlot(SC.mlm, scale=20, fill.alpha.max=.4, cex.lab=1.5, id.cex=1.5)
## H Q CookD L R
## 15 0.02326 0.39854 0.42016 0.02381 0.40803
## 80 0.03333 0.02328 0.03518 0.03448 0.02408
op <- par(mar=c(5,4,1,1)+.1)
influencePlot(SC.mlm, type="LR", scale=20, fill.alpha.max=.4, id.cex=1.5)
## H Q CookD L R
## 15 0.02326 0.39854 0.42016 0.02381 0.40803
## 80 0.03333 0.02328 0.03518 0.03448 0.02408
par(op)
#dev.copy2pdf(file="SC-LRplot.pdf")
SC.mlm1 <- update(SC.mlm, subset=rownames(SocialCog)!="15")
SC.mlm1
##
## Call:
## lm(formula = cbind(MgeEmotions, ToM, ExtBias, PersBias) ~ Dx,
## data = SocialCog, subset = rownames(SocialCog) != "15")
##
## Coefficients:
## MgeEmotions ToM ExtBias PersBias
## (Intercept) 41.7961 22.9584 2.0211 0.6556
## Dx1 3.2494 2.0264 0.7517 -0.0737
## Dx2 -4.3619 -0.9214 -0.4548 -0.0149
Anova(SC.mlm1)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Dx 2 0.201 3.71 8 266 0.00039 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(linearHypothesis(SC.mlm1, "Dx1"), SSP=FALSE)
##
## Multivariate Tests:
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.1328 5.052 4 132 0.000807 ***
## Wilks 1 0.8672 5.052 4 132 0.000807 ***
## Hotelling-Lawley 1 0.1531 5.052 4 132 0.000807 ***
## Roy 1 0.1531 5.052 4 132 0.000807 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(linearHypothesis(SC.mlm1, "Dx2"), SSP=FALSE)
##
## Multivariate Tests:
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0621 2.183 4 132 0.0743 .
## Wilks 1 0.9379 2.183 4 132 0.0743 .
## Hotelling-Lawley 1 0.0662 2.183 4 132 0.0743 .
## Roy 1 0.0662 2.183 4 132 0.0743 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
heplot(SC.mlm1, hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
fill=TRUE, fill.alpha=.1,
cex.lab=1.5, cex=1.2)
pairs(SC.mlm1, fill=c(TRUE,FALSE), fill.alpha=.1)
SC.can1 <- candisc(SC.mlm1)
SC.can1
##
## Canonical Discriminant Analysis for Dx:
##
## CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.1645 0.1969 0.159 83.9 83.9
## 2 0.0364 0.0378 0.159 16.1 100.0
##
## Test of H0: The canonical correlations in the
## current row and all that follow are zero
##
## LR test stat approx F num Df den Df Pr(> F)
## 1 0.805 7.67 4 268 7.3e-06 ***
## 2 0.964 5.10 1 135 0.026 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
the plot shows the canonical scores with data ellipses and variable vectors indicating how they relate to the canonical dimensions.
plot(SC.can1, ellipse=TRUE)
equivalent plot, as an HE plot
heplot(SC.can1,
fill=TRUE, fill.alpha=.1,
hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
lwd = c(1, 2, 3, 3),
col=c("red", "blue", "darkgreen", "darkgreen"),
var.lwd=2, var.col="black",
label.pos=c(3,1), var.cex=1.2,
cex=1.25, cex.lab=1.2, scale=2.8,
prefix="Canonical dimension ")
As an alternative to selective screening to omit possibly influential cases, we can use an iterative robust method that downweights cases based on the squared Mahalanobis distances of the residuals.
SC.rlm <- robmlm(cbind( MgeEmotions, ToM, ExtBias, PersBias) ~ Dx,
data=SocialCog,
subset=rownames(SocialCog)!="15")
# apply this to all the observations
SC.rlm <- robmlm(cbind( MgeEmotions, ToM, ExtBias, PersBias) ~ Dx,
data=SocialCog
# subset=rownames(SocialCog)!="15"
)
Anova(SC.rlm)
##
## Type II MANOVA Tests: Pillai test statistic
## Df test stat approx F num Df den Df Pr(>F)
## Dx 2 0.25 4.75 8 266 1.8e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(linearHypothesis(SC.rlm, "Dx1"), SSP=FALSE)
##
## Multivariate Tests:
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.1765 7.074 4 132 3.43e-05 ***
## Wilks 1 0.8235 7.074 4 132 3.43e-05 ***
## Hotelling-Lawley 1 0.2144 7.074 4 132 3.43e-05 ***
## Roy 1 0.2144 7.074 4 132 3.43e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(linearHypothesis(SC.rlm, "Dx2"), SSP=FALSE)
##
## Multivariate Tests:
## Df test stat approx F num Df den Df Pr(>F)
## Pillai 1 0.0689 2.442 4 132 0.0499 *
## Wilks 1 0.9311 2.442 4 132 0.0499 *
## Hotelling-Lawley 1 0.0740 2.442 4 132 0.0499 *
## Roy 1 0.0740 2.442 4 132 0.0499 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairs(SC.rlm, fill=c(TRUE,FALSE), fill.alpha=.1)
NB: There is now a plot.robmlm()
function in heplots
that does most of this
Dx <- SocialCog$Dx
bad <- SC.rlm$weights < .6 # points to label
col <- c("red", "darkgreen", "blue") # colors for groups
plot(SC.rlm$weights,
xlab="Case index",
ylab="Weight in robust MANOVA",
pch=(15:17)[Dx],
col=col[Dx], ylim=c(0,1),
cex = ifelse(bad, 1.4, .9),
cex.lab = 1.25)
breaks <-cumsum(table(SocialCog$Dx))
ctr <- c(0,breaks[1:2]) + diff(c(0, breaks))/2
abline(v=breaks[1:2]+.5, col="grey")
text(ctr, 0.05+c(0, -.04, 0), levels(Dx), cex=1.2)
# label "outliers"
text((1:nrow(SocialCog))[bad], SC.rlm$weights[bad],
labels=rownames(SocialCog)[bad], pos=c(2,4,4))