Analysis of Social Cognition data

This script reproduces all of the analysis and graphs for the MANOVA of the SocialCog data in the paper and also includes other analyses not described there. It is set up as an R script that can be “compiled” to HTML, Word, or PDF using knitr::knit(). This is most convenient within R Studio via the File -> Compile Notebook option.

Load packages and the data

library(heplots)
library(candisc)
library(mvinfluence)
data(SocialCog, package="heplots")
str(SocialCog)

## 'data.frame':    139 obs. of  5 variables:
##  $ Dx         : Factor w/ 3 levels "Schizophrenia",..: 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "contrasts")= num [1:3, 1:2] -0.5 -0.5 1 1 -1 0
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr  "Schizophrenia" "Schizoaffective" "Control"
##   .. .. ..$ : NULL
##  $ MgeEmotions: int  28 37 24 28 28 28 44 39 24 32 ...
##  $ ToM        : int  10 12 19 17 17 13 19 20 26 18 ...
##  $ ExtBias    : int  1 -33 -1 -6 8 2 -2 -1 5 -2 ...
##  $ PersBias   : num  0.312 0.583 1 0.333 0.909 ...
##  - attr(*, "na.action")=Class 'omit'  Named int [1:124] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..- attr(*, "names")= chr [1:124] "1" "2" "3" "4" ...

Fit the MANOVA model, test hypotheses

SC.mlm <-  lm(cbind(MgeEmotions,ToM, ExtBias, PersBias) ~ Dx,
               data=SocialCog)

Anova(SC.mlm)

## 
## Type II MANOVA Tests: Pillai test statistic
##    Df test stat approx F num Df den Df  Pr(>F)    
## Dx  2     0.212     3.97      8    268 0.00018 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test contrasts of substantive interest: Dx1: Control group vs. schizo groups; Dx2: Schizophrenia group vs. schizoaffective.

print(linearHypothesis(SC.mlm, "Dx1"), SSP=FALSE)

## 
## Multivariate Tests: 
##                  Df test stat approx F num Df den Df   Pr(>F)    
## Pillai            1    0.1355    5.212      4    133 0.000624 ***
## Wilks             1    0.8645    5.212      4    133 0.000624 ***
## Hotelling-Lawley  1    0.1568    5.212      4    133 0.000624 ***
## Roy               1    0.1568    5.212      4    133 0.000624 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

print(linearHypothesis(SC.mlm, "Dx2"), SSP=FALSE)

## 
## Multivariate Tests: 
##                  Df test stat approx F num Df den Df Pr(>F)  
## Pillai            1    0.0697    2.493      4    133 0.0461 *
## Wilks             1    0.9303    2.493      4    133 0.0461 *
## Hotelling-Lawley  1    0.0750    2.493      4    133 0.0461 *
## Roy               1    0.0750    2.493      4    133 0.0461 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HE plots

This plot shows the first two response variables. We can also embed the H ellipses for the contrasts to see how they relate to differences in the group means.

heplot(SC.mlm, hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
    fill=TRUE, fill.alpha=.1,
    cex.lab=1.5, cex=1.2)

The pairs() plot shows all pairwise views

pairs(SC.mlm, fill=c(TRUE,FALSE), fill.alpha=.1,
    hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"))

Canonical discriminant analysis

How do the groups differ in the canonical space accounting for the greatest group separation?

SC.can <- candisc(SC.mlm)
SC.can

## 
## Canonical Discriminant Analysis for Dx:
## 
##   CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.1756     0.2129      0.175    84.9       84.9
## 2 0.0365     0.0379      0.175    15.1      100.0
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##   LR test stat approx F num Df den Df Pr(> F)    
## 1        0.794     8.24      4    270 2.8e-06 ***
## 2        0.963     5.15      1    136   0.025 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The plot shows the canonical scores with data ellipses and variable vectors indicating how they relate to the canonical dimensions. There is a lot of overlap, but the groups are clearly separated along both canonical dimensions.

col <- c("red", "darkgreen", "blue")
plot(SC.can, ellipse=TRUE, pch=c(7,9,10),
    var.cex=1.2, cex.lab=1.5, var.lwd=2,  col=col,
    var.col="black", # var.pos=pos,
    prefix="Canonical dimension ")

An equivalent plot, as an HE plot. We can also project the contrasts among groups into this space.

heplot(SC.can, hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
    fill=TRUE, fill.alpha=.1, 
    var.lwd=2, var.col="black", 
    label.pos=c(3,1), 
    cex=1.25, cex.lab=1.2,
    prefix="Canonical dimension ")

## Vector scale factor set to  2.777

Examine model residuals in a chi-square QQ plot

cqplot(SC.mlm, id.n=1, main="", cex.lab=1.25)

Robust version, using MVE shows 4 possible outliers!

cqplot(SC.mlm, method="mve", id.n=4, main="", cex.lab=1.25)

One extreme outlier spoils the pie! The influence plot shows how it stands out.

influencePlot(SC.mlm, scale=20, fill.alpha.max=.4, cex.lab=1.5, id.cex=1.5)

##          H       Q   CookD       L       R
## 15 0.02326 0.39854 0.42016 0.02381 0.40803
## 80 0.03333 0.02328 0.03518 0.03448 0.02408

op <- par(mar=c(5,4,1,1)+.1)
influencePlot(SC.mlm, type="LR", scale=20, fill.alpha.max=.4, id.cex=1.5)

##          H       Q   CookD       L       R
## 15 0.02326 0.39854 0.42016 0.02381 0.40803
## 80 0.03333 0.02328 0.03518 0.03448 0.02408

par(op)
#dev.copy2pdf(file="SC-LRplot.pdf")

Refit the model, deleting that case

SC.mlm1 <- update(SC.mlm, subset=rownames(SocialCog)!="15")
SC.mlm1

## 
## Call:
## lm(formula = cbind(MgeEmotions, ToM, ExtBias, PersBias) ~ Dx, 
##     data = SocialCog, subset = rownames(SocialCog) != "15")
## 
## Coefficients:
##              MgeEmotions  ToM      ExtBias  PersBias
## (Intercept)  41.7961      22.9584   2.0211   0.6556 
## Dx1           3.2494       2.0264   0.7517  -0.0737 
## Dx2          -4.3619      -0.9214  -0.4548  -0.0149

Anova(SC.mlm1)

## 
## Type II MANOVA Tests: Pillai test statistic
##    Df test stat approx F num Df den Df  Pr(>F)    
## Dx  2     0.201     3.71      8    266 0.00039 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

print(linearHypothesis(SC.mlm1, "Dx1"), SSP=FALSE)

## 
## Multivariate Tests: 
##                  Df test stat approx F num Df den Df   Pr(>F)    
## Pillai            1    0.1328    5.052      4    132 0.000807 ***
## Wilks             1    0.8672    5.052      4    132 0.000807 ***
## Hotelling-Lawley  1    0.1531    5.052      4    132 0.000807 ***
## Roy               1    0.1531    5.052      4    132 0.000807 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

print(linearHypothesis(SC.mlm1, "Dx2"), SSP=FALSE)

## 
## Multivariate Tests: 
##                  Df test stat approx F num Df den Df Pr(>F)  
## Pillai            1    0.0621    2.183      4    132 0.0743 .
## Wilks             1    0.9379    2.183      4    132 0.0743 .
## Hotelling-Lawley  1    0.0662    2.183      4    132 0.0743 .
## Roy               1    0.0662    2.183      4    132 0.0743 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HE plots of revised model

heplot(SC.mlm1, hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
    fill=TRUE, fill.alpha=.1,
    cex.lab=1.5, cex=1.2)

pairs(SC.mlm1, fill=c(TRUE,FALSE), fill.alpha=.1)

Canonical discriminant analysis

SC.can1 <- candisc(SC.mlm1)
SC.can1

## 
## Canonical Discriminant Analysis for Dx:
## 
##   CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.1645     0.1969      0.159    83.9       83.9
## 2 0.0364     0.0378      0.159    16.1      100.0
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##   LR test stat approx F num Df den Df Pr(> F)    
## 1        0.805     7.67      4    268 7.3e-06 ***
## 2        0.964     5.10      1    135   0.026 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

the plot shows the canonical scores with data ellipses and variable vectors indicating how they relate to the canonical dimensions.

plot(SC.can1, ellipse=TRUE)

equivalent plot, as an HE plot

heplot(SC.can1, 
    fill=TRUE, fill.alpha=.1,
    hypotheses=list("Dx1"="Dx1", "Dx2"="Dx2"),
    lwd = c(1, 2, 3, 3),
    col=c("red", "blue", "darkgreen", "darkgreen"),
    var.lwd=2, var.col="black", 
    label.pos=c(3,1), var.cex=1.2, 
    cex=1.25, cex.lab=1.2, scale=2.8,
    prefix="Canonical dimension ")

Robust MLM

As an alternative to selective screening to omit possibly influential cases, we can use an iterative robust method that downweights cases based on the squared Mahalanobis distances of the residuals.

SC.rlm <- robmlm(cbind( MgeEmotions, ToM, ExtBias, PersBias) ~ Dx,
               data=SocialCog,
               subset=rownames(SocialCog)!="15")

# apply this to all the observations
SC.rlm <- robmlm(cbind( MgeEmotions, ToM, ExtBias, PersBias) ~ Dx,
               data=SocialCog
#               subset=rownames(SocialCog)!="15"
               )

Anova(SC.rlm)

## 
## Type II MANOVA Tests: Pillai test statistic
##    Df test stat approx F num Df den Df  Pr(>F)    
## Dx  2      0.25     4.75      8    266 1.8e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

print(linearHypothesis(SC.rlm, "Dx1"), SSP=FALSE)

## 
## Multivariate Tests: 
##                  Df test stat approx F num Df den Df   Pr(>F)    
## Pillai            1    0.1765    7.074      4    132 3.43e-05 ***
## Wilks             1    0.8235    7.074      4    132 3.43e-05 ***
## Hotelling-Lawley  1    0.2144    7.074      4    132 3.43e-05 ***
## Roy               1    0.2144    7.074      4    132 3.43e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

print(linearHypothesis(SC.rlm, "Dx2"), SSP=FALSE)

## 
## Multivariate Tests: 
##                  Df test stat approx F num Df den Df Pr(>F)  
## Pillai            1    0.0689    2.442      4    132 0.0499 *
## Wilks             1    0.9311    2.442      4    132 0.0499 *
## Hotelling-Lawley  1    0.0740    2.442      4    132 0.0499 *
## Roy               1    0.0740    2.442      4    132 0.0499 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pairs(SC.rlm, fill=c(TRUE,FALSE), fill.alpha=.1)

Plot the weights from the robust MLM

NB: There is now a plot.robmlm() function in heplots that does most of this

Dx <- SocialCog$Dx
bad <- SC.rlm$weights < .6              # points to label
col <- c("red", "darkgreen", "blue")    # colors for groups

plot(SC.rlm$weights, 
    xlab="Case index", 
    ylab="Weight in robust MANOVA", 
    pch=(15:17)[Dx],
    col=col[Dx], ylim=c(0,1), 
    cex = ifelse(bad, 1.4, .9),
    cex.lab = 1.25)

breaks <-cumsum(table(SocialCog$Dx))
ctr <- c(0,breaks[1:2]) + diff(c(0, breaks))/2
abline(v=breaks[1:2]+.5, col="grey")
text(ctr, 0.05+c(0, -.04, 0), levels(Dx), cex=1.2)
# label "outliers"
text((1:nrow(SocialCog))[bad], SC.rlm$weights[bad], 
     labels=rownames(SocialCog)[bad], pos=c(2,4,4))

Analysis of Social Cognition data

Michael Friendly, Matthew Sigal

08 Jun 2016

Load packages and the data

Fit the MANOVA model, test hypotheses

HE plots

Canonical discriminant analysis

Examine model residuals in a chi-square QQ plot

Refit the model, deleting that case

HE plots of revised model

Canonical discriminant analysis

Robust MLM

Plot the weights from the robust MLM