Load packages

library(dplyr)
library(ggplot2)
library(car)
library(effects)
library(palmerpenguins)

Clean up variable names, get rid of NAs, etc.

Remove units from variable names (for display purposes). Character variables should be factors.

peng <- penguins %>%
    rename(
         bill_length = bill_length_mm, 
         bill_depth = bill_depth_mm, 
         flipper_length = flipper_length_mm, 
         body_mass = body_mass_g
         ) %>%
  mutate(species = as.factor(species),
         island = as.factor(island),
         sex = as.factor(substr(sex,1,1))) %>%
  filter(!is.na(bill_depth),
         !is.na(sex))

str(peng)
## tibble [333 x 8] (S3: tbl_df/tbl/data.frame)
##  $ species       : Factor w/ 3 levels "Adelie","Chinstrap",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ island        : Factor w/ 3 levels "Biscoe","Dream",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ bill_length   : num [1:333] 39.1 39.5 40.3 36.7 39.3 38.9 39.2 41.1 38.6 34.6 ...
##  $ bill_depth    : num [1:333] 18.7 17.4 18 19.3 20.6 17.8 19.6 17.6 21.2 21.1 ...
##  $ flipper_length: int [1:333] 181 186 195 193 190 181 195 182 191 198 ...
##  $ body_mass     : int [1:333] 3750 3800 3250 3450 3650 3625 4675 3200 3800 4400 ...
##  $ sex           : Factor w/ 2 levels "f","m": 2 1 1 1 2 1 2 1 2 2 ...
##  $ year          : int [1:333] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...

Function to reproduce ggplot2 default colors

ggplotColours <- function(n = 6, h = c(0, 360) + 15){
  if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
  hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}

Scatterplot matrix

car::scatterplotMatrix with data ellipses and regression lines by species

scatterplotMatrix(~ bill_length + bill_depth + flipper_length + body_mass | species,
                  data=peng,
                  col = ggplotColours(3),
                  ellipse=list(levels=0.68))

Faceted plot by island

Assume our main interest is in understanding how bill_length is related to other variables. We can easily include categorical predictors (sex and island) using color and faceting.

ggplot(peng, aes(x=body_mass, y=bill_length, color=sex)) + 
  geom_point() + 
  geom_smooth(method="lm", formula = "y ~ x") +
  facet_grid(~ island) +
  theme_bw()

Quick checks for non-linear relations

Fitting a quadratic with geom_smooth() is one simple way to check for non-linearity. Another is to use a method="loess" smoother.

ggplot(peng, aes(x=body_mass, y=bill_length, color=sex)) + 
  geom_point() + 
  geom_smooth(method="lm", formula = "y ~ poly(x,2)") +
  facet_grid(~ island) +
  theme_bw()

Do the same, but facet by island & species

ggplot(peng, aes(x=body_mass, y=bill_length, color=sex, shape=species)) + 
  geom_point() + 
  geom_smooth(method="lm", formula = "y ~ poly(x,2)") +
  facet_wrap(island ~ species) +
  theme_bw()

Fit a model for bill_length

Here, use body_mass as main quantitative predictor, but control for sex, species and island

peng.mod0 <- lm(bill_length ~ body_mass + sex + species + island, data=peng)
Anova(peng.mod0)
## Anova Table (Type II tests)
## 
## Response: bill_length
##           Sum Sq  Df F value  Pr(>F)    
## body_mass    100   1   19.61 1.3e-05 ***
## sex          251   1   49.12 1.4e-11 ***
## species     3572   2  349.25 < 2e-16 ***
## island         9   2    0.89    0.41    
## Residuals   1667 326                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Try an interaction term

peng.mod1 <- lm(bill_length ~ body_mass * sex + species + island, data=peng)
Anova(peng.mod1)
## Anova Table (Type II tests)
## 
## Response: bill_length
##               Sum Sq  Df F value  Pr(>F)    
## body_mass        100   1   19.55 1.3e-05 ***
## sex              251   1   48.97 1.5e-11 ***
## species         3527   2  343.76 < 2e-16 ***
## island             9   2    0.89    0.41    
## body_mass:sex      0   1    0.00    0.96    
## Residuals       1667 325                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test all interactions

This is a simple screening test to check whether any interactions with body_mass are important.

peng.mod2 <- lm(bill_length ~ body_mass * (sex + species + island), data=peng)
Anova(peng.mod2)
## Anova Table (Type II tests)
## 
## Response: bill_length
##                   Sum Sq  Df F value  Pr(>F)    
## body_mass            100   1   19.69 1.3e-05 ***
## sex                  256   1   50.25 8.6e-12 ***
## species             3383   2  331.98 < 2e-16 ***
## island                 9   2    0.91    0.40    
## body_mass:sex          5   1    1.04    0.31    
## body_mass:species     22   2    2.12    0.12    
## body_mass:island       2   2    0.17    0.84    
## Residuals           1636 321                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Compare models

Anova() tests the additional contribution of each model over the previous one.

anova(peng.mod0, peng.mod1, peng.mod2)
## Analysis of Variance Table
## 
## Model 1: bill_length ~ body_mass + sex + species + island
## Model 2: bill_length ~ body_mass * sex + species + island
## Model 3: bill_length ~ body_mass * (sex + species + island)
##   Res.Df  RSS Df Sum of Sq    F Pr(>F)
## 1    326 1667                         
## 2    325 1667  1      0.01 0.00   0.96
## 3    321 1636  4     31.44 1.54   0.19

Effect plots

By default, this plots the mean response (bill_length) all the high-order terms in the model (species, island, and body_mass * sex), averaged over all other variables not shown in a given plot.

peng.eff <- allEffects(peng.mod1)
plot(peng.eff)

IycgLS0tDQojJyB0aXRsZTogIkxpbmVhciBtb2RlbHMgZm9yIHBlbmd1aW5zIGRhdGEiDQojJyBhdXRob3I6ICJNaWNoYWVsIEZyaWVuZGx5Ig0KIycgZGF0ZTogIjExIEZlYiAyMDIxIg0KIycgb3V0cHV0Og0KIycgICBodG1sX2RvY3VtZW50Og0KIycgICAgIHRoZW1lOiByZWFkYWJsZQ0KIycgICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCiMnIC0tLQ0KDQojKyBlY2hvPUZBTFNFDQprbml0cjo6b3B0c19jaHVuayRzZXQod2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRSwgUi5vcHRpb25zPWxpc3QoZGlnaXRzPTQpKQ0KDQojJyAjIyBMb2FkIHBhY2thZ2VzDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShjYXIpDQpsaWJyYXJ5KGVmZmVjdHMpDQpsaWJyYXJ5KHBhbG1lcnBlbmd1aW5zKQ0KDQojJyAjIyBDbGVhbiB1cCB2YXJpYWJsZSBuYW1lcywgZ2V0IHJpZCBvZiBOQXMsIGV0Yy4NCiMnIA0KIycgUmVtb3ZlIHVuaXRzIGZyb20gdmFyaWFibGUgbmFtZXMgKGZvciBkaXNwbGF5IHB1cnBvc2VzKS4gQ2hhcmFjdGVyIHZhcmlhYmxlcyBzaG91bGQgYmUgZmFjdG9ycy4NCnBlbmcgPC0gcGVuZ3VpbnMgJT4lDQoJcmVuYW1lKA0KICAgICAgICAgYmlsbF9sZW5ndGggPSBiaWxsX2xlbmd0aF9tbSwgDQogICAgICAgICBiaWxsX2RlcHRoID0gYmlsbF9kZXB0aF9tbSwgDQogICAgICAgICBmbGlwcGVyX2xlbmd0aCA9IGZsaXBwZXJfbGVuZ3RoX21tLCANCiAgICAgICAgIGJvZHlfbWFzcyA9IGJvZHlfbWFzc19nDQogICAgICAgICApICU+JQ0KICBtdXRhdGUoc3BlY2llcyA9IGFzLmZhY3RvcihzcGVjaWVzKSwNCiAgICAgICAgIGlzbGFuZCA9IGFzLmZhY3Rvcihpc2xhbmQpLA0KICAgICAgICAgc2V4ID0gYXMuZmFjdG9yKHN1YnN0cihzZXgsMSwxKSkpICU+JQ0KICBmaWx0ZXIoIWlzLm5hKGJpbGxfZGVwdGgpLA0KICAgICAgICAgIWlzLm5hKHNleCkpDQoNCnN0cihwZW5nKQ0KDQojJyAjIyBGdW5jdGlvbiB0byByZXByb2R1Y2UgZ2dwbG90MiBkZWZhdWx0IGNvbG9ycw0KZ2dwbG90Q29sb3VycyA8LSBmdW5jdGlvbihuID0gNiwgaCA9IGMoMCwgMzYwKSArIDE1KXsNCiAgaWYgKChkaWZmKGgpICUlIDM2MCkgPCAxKSBoWzJdIDwtIGhbMl0gLSAzNjAvbg0KICBoY2woaCA9IChzZXEoaFsxXSwgaFsyXSwgbGVuZ3RoID0gbikpLCBjID0gMTAwLCBsID0gNjUpDQp9DQoNCiMnICMjIFNjYXR0ZXJwbG90IG1hdHJpeA0KIycgDQojJyBgY2FyOjpzY2F0dGVycGxvdE1hdHJpeGAgd2l0aCBkYXRhIGVsbGlwc2VzIGFuZCByZWdyZXNzaW9uIGxpbmVzIGJ5IHNwZWNpZXMNCg0KIysgZmlnLmhlaWdodD02LCBmaWcud2lkdGg9Ng0Kc2NhdHRlcnBsb3RNYXRyaXgofiBiaWxsX2xlbmd0aCArIGJpbGxfZGVwdGggKyBmbGlwcGVyX2xlbmd0aCArIGJvZHlfbWFzcyB8IHNwZWNpZXMsDQogICAgICAgICAgICAgICAgICBkYXRhPXBlbmcsDQogICAgICAgICAgICAgICAgICBjb2wgPSBnZ3Bsb3RDb2xvdXJzKDMpLA0KICAgICAgICAgICAgICAgICAgZWxsaXBzZT1saXN0KGxldmVscz0wLjY4KSkNCg0KIycgIyMgRmFjZXRlZCBwbG90IGJ5IGlzbGFuZA0KIycgDQojJyBBc3N1bWUgb3VyIG1haW4gaW50ZXJlc3QgaXMgaW4gdW5kZXJzdGFuZGluZyBob3cgYGJpbGxfbGVuZ3RoYCBpcyByZWxhdGVkIHRvIG90aGVyIHZhcmlhYmxlcy4NCiMnIFdlIGNhbiBlYXNpbHkgaW5jbHVkZSBjYXRlZ29yaWNhbCBwcmVkaWN0b3JzIChgc2V4YCBhbmQgYGlzbGFuZGApIHVzaW5nIGNvbG9yIGFuZCBmYWNldGluZy4NCiMnIA0KIysgZmlnLndpZHRoPTgsIGZpZy5oZWlnaHQ9NA0KZ2dwbG90KHBlbmcsIGFlcyh4PWJvZHlfbWFzcywgeT1iaWxsX2xlbmd0aCwgY29sb3I9c2V4KSkgKyANCiAgZ2VvbV9wb2ludCgpICsgDQogIGdlb21fc21vb3RoKG1ldGhvZD0ibG0iLCBmb3JtdWxhID0gInkgfiB4IikgKw0KICBmYWNldF9ncmlkKH4gaXNsYW5kKSArDQogIHRoZW1lX2J3KCkNCg0KIycgIyMgUXVpY2sgY2hlY2tzIGZvciBub24tbGluZWFyIHJlbGF0aW9ucw0KIycgRml0dGluZyBhIHF1YWRyYXRpYyB3aXRoIGBnZW9tX3Ntb290aCgpYCBpcyBvbmUgc2ltcGxlIHdheSB0byBjaGVjayBmb3Igbm9uLWxpbmVhcml0eS4NCiMnIEFub3RoZXIgaXMgdG8gdXNlIGEgYG1ldGhvZD0ibG9lc3MiYCBzbW9vdGhlci4NCg0KIysgZmlnLndpZHRoPTgsIGZpZy5oZWlnaHQ9NA0KZ2dwbG90KHBlbmcsIGFlcyh4PWJvZHlfbWFzcywgeT1iaWxsX2xlbmd0aCwgY29sb3I9c2V4KSkgKyANCiAgZ2VvbV9wb2ludCgpICsgDQogIGdlb21fc21vb3RoKG1ldGhvZD0ibG0iLCBmb3JtdWxhID0gInkgfiBwb2x5KHgsMikiKSArDQogIGZhY2V0X2dyaWQofiBpc2xhbmQpICsNCiAgdGhlbWVfYncoKQ0KDQojJyBEbyB0aGUgc2FtZSwgYnV0IGZhY2V0IGJ5IGlzbGFuZCAmIHNwZWNpZXMNCiMrIGZpZy53aWR0aD04DQpnZ3Bsb3QocGVuZywgYWVzKHg9Ym9keV9tYXNzLCB5PWJpbGxfbGVuZ3RoLCBjb2xvcj1zZXgsIHNoYXBlPXNwZWNpZXMpKSArIA0KICBnZW9tX3BvaW50KCkgKyANCiAgZ2VvbV9zbW9vdGgobWV0aG9kPSJsbSIsIGZvcm11bGEgPSAieSB+IHBvbHkoeCwyKSIpICsNCiAgZmFjZXRfd3JhcChpc2xhbmQgfiBzcGVjaWVzKSArDQogIHRoZW1lX2J3KCkNCg0KDQojJyAjIyBGaXQgYSBtb2RlbCBmb3IgYGJpbGxfbGVuZ3RoYA0KDQojJyBIZXJlLCB1c2UgYGJvZHlfbWFzc2AgYXMgbWFpbiBxdWFudGl0YXRpdmUgcHJlZGljdG9yLCBidXQgY29udHJvbCBmb3IgYHNleGAsIGBzcGVjaWVzYCBhbmQgYGlzbGFuZGANCnBlbmcubW9kMCA8LSBsbShiaWxsX2xlbmd0aCB+IGJvZHlfbWFzcyArIHNleCArIHNwZWNpZXMgKyBpc2xhbmQsIGRhdGE9cGVuZykNCkFub3ZhKHBlbmcubW9kMCkNCg0KIycgIyMjIFRyeSBhbiBpbnRlcmFjdGlvbiB0ZXJtDQpwZW5nLm1vZDEgPC0gbG0oYmlsbF9sZW5ndGggfiBib2R5X21hc3MgKiBzZXggKyBzcGVjaWVzICsgaXNsYW5kLCBkYXRhPXBlbmcpDQpBbm92YShwZW5nLm1vZDEpDQoNCiMnICMjIyBUZXN0IGFsbCBpbnRlcmFjdGlvbnMNCiMnIA0KIycgVGhpcyBpcyBhIHNpbXBsZSBzY3JlZW5pbmcgdGVzdCB0byBjaGVjayB3aGV0aGVyIGFueSBpbnRlcmFjdGlvbnMgd2l0aCBgYm9keV9tYXNzYCBhcmUgaW1wb3J0YW50Lg0KcGVuZy5tb2QyIDwtIGxtKGJpbGxfbGVuZ3RoIH4gYm9keV9tYXNzICogKHNleCArIHNwZWNpZXMgKyBpc2xhbmQpLCBkYXRhPXBlbmcpDQpBbm92YShwZW5nLm1vZDIpDQoNCiMnICMjIyBDb21wYXJlIG1vZGVscw0KIycgDQojJyBgQW5vdmEoKWAgdGVzdHMgdGhlIGFkZGl0aW9uYWwgY29udHJpYnV0aW9uIG9mIGVhY2ggbW9kZWwgb3ZlciB0aGUgcHJldmlvdXMgb25lLg0KYW5vdmEocGVuZy5tb2QwLCBwZW5nLm1vZDEsIHBlbmcubW9kMikNCg0KIycgIyMgRWZmZWN0IHBsb3RzDQojJyANCiMnIEJ5IGRlZmF1bHQsIHRoaXMgcGxvdHMgdGhlIG1lYW4gcmVzcG9uc2UgKGBiaWxsX2xlbmd0aGApIGFsbCB0aGUgaGlnaC1vcmRlciB0ZXJtcyBpbiB0aGUgbW9kZWwNCiMnIChgc3BlY2llc2AsIGBpc2xhbmRgLCBhbmQgYGJvZHlfbWFzcyAqIHNleGApLCAgYXZlcmFnZWQgb3ZlciBhbGwgb3RoZXIgdmFyaWFibGVzIG5vdCBzaG93biBpbiBhIGdpdmVuDQojJyBwbG90Lg0KIycgDQojKyBmaWcud2lkdGg9OCwgZmlnLmhlaWdodD04DQpwZW5nLmVmZiA8LSBhbGxFZmZlY3RzKHBlbmcubW9kMSkNCnBsb3QocGVuZy5lZmYpDQo=