This example shows the development of plots of Andre-Michel Guerry’s (1833) data on crime and literacy in France ## Load packages

library(ggplot2)
library(ggrepel)

Load the data

data(Guerry, package="Guerry")
head(Guerry[, 1:9])
##   dept Region   Department Crime_pers Crime_prop Literacy Donations Infants
## 1    1      E          Ain      28870      15890       37      5098   33120
## 2    2      N        Aisne      26226       5521       51      8901   14572
## 3    3      C       Allier      26747       7925       13     10973   17044
## 4    4      E Basses-Alpes      12935       7289       46      2733   23018
## 5    5      E Hautes-Alpes      17488       8174       69      6962   23076
## 6    7      S      Ardeche       9474      10263       27      3188   42117
##   Suicides
## 1    35039
## 2    12831
## 3   114121
## 4    14238
## 5    16171
## 6    52547

Start with basic scatterplot

ggplot(aes(x=Literacy, y=Crime_pers/1000), data=Guerry) +
  geom_point(size=2) 

# ggsave("guerry-crime1.png", width=5, height=5)

Add data ellipses

Data ellipses give a visual assessment of the correlation between variables

ggplot(aes(x=Literacy, y=Crime_pers/1000), data=Guerry) +
  geom_point(size=2) +
  stat_ellipse(level=0.68, color="blue", size=1.2) +  
  stat_ellipse(level=0.95, color="gray", size=1, linetype=2) 

# ggsave("guerry-crime2.png", width=5, height=5)

Add smooths

ggplot(aes(x=Literacy, y=Crime_pers/1000), data=Guerry) +
  geom_point(size=2) +
  stat_ellipse(level=0.68, color="blue", size=1.2) +  
  stat_ellipse(level=0.95, color="gray", size=1, linetype=2) + 
  geom_smooth(method="lm", formula=y~x, fill="lightblue") +
  geom_smooth(method="loess", formula=y~x, color="red", se=FALSE) 

# ggsave("guerry-crime3.png", width=5, height=5)

Do some styling

gplot <- last_plot()
gplot + theme_bw(base_size = 18) 

Put the steps together

ggplot(aes(x=Literacy, y=Crime_pers/1000), data=Guerry) +
  geom_point(size=2) +
  stat_ellipse(level=0.68, color="blue", size=1.2) +  
  stat_ellipse(level=0.95, color="gray", size=1, linetype=2) + 
  geom_smooth(method="lm", formula=y~x, fill="lightblue") +
  geom_smooth(method="loess", formula=y~x, color="red", se=FALSE) +
  theme_bw() + theme(text = element_text(size=18))

# ggsave("guerry-crime4.png", width=5, height=5)

Use Mahalanobis Dsq to label unusual points

gdf <- Guerry[, c("Literacy", "Crime_pers", "Department")]
gdf$dsq <- heplots::Mahalanobis(gdf[, 1:2])

ggplot(aes(x=Literacy, y=Crime_pers/1000), data=Guerry) +
  geom_point(size=2) +
  stat_ellipse(level=0.68, color="blue", size=1.2) +  
  stat_ellipse(level=0.95, color="gray", size=1, linetype=2) + 
  geom_smooth(method="lm", formula=y~x, fill="lightblue") +
  geom_smooth(method="loess", formula=y~x, color="red", se=FALSE) +
    geom_label_repel(aes(label=Department), data = gdf[gdf$dsq > 4.6,]) +
  theme_bw() + theme(text = element_text(size=18))

# ggsave("guerry-crime5.png", width=5, height=5)

gplot + 
     theme_bw() + 
     theme(text = element_text(size=18)) +        
       geom_label_repel(aes(label=Department), data = gdf[gdf$dsq > 4.6,])

Do the same for property crime

gdf <- Guerry[, c("Literacy", "Crime_prop", "Department")]
gdf$dsq <- heplots::Mahalanobis(gdf[, 1:2])

ggplot(aes(x=Literacy, y=Crime_prop/1000), data=Guerry) +
  geom_point(size=2) +
  stat_ellipse(level=0.68, color="blue", size=1.2) +  
  stat_ellipse(level=0.95, color="gray", size=1, linetype=2) + 
  geom_smooth(method="lm", formula=y~x, fill="lightblue") +
  geom_smooth(method="loess", formula=y~x, color="red", se=FALSE) +
    geom_label_repel(aes(label=Department), data = gdf[gdf$dsq > 4.6,]) +
  theme_bw() + theme(text = element_text(size=18))

# ggsave("guerry-crime-prop.png", width=5, height=5)
IycgLS0tDQojJyB0aXRsZTogIkd1ZXJyeSdzIGRhdGEgb24gY3JpbWUgYW5kIGxpdGVyYWN5Ig0KIycgYXV0aG9yOiAiTWljaGFlbCBGcmllbmRseSINCiMnIGNhdGVnb3J5OiAiZ2dwbG90MiINCiMnIGRhdGU6ICJgciBmb3JtYXQoU3lzLkRhdGUoKSlgIg0KIycgb3V0cHV0Og0KIycgICBodG1sX2RvY3VtZW50Og0KIycgICAgIHRoZW1lOiByZWFkYWJsZQ0KIycgICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCiMnICAgd29yZF9kb2N1bWVudDogZGVmYXVsdCAgICANCiMnIC0tLQ0KDQojKyBlY2hvPUZBTFNFDQprbml0cjo6b3B0c19jaHVuayRzZXQod2FybmluZz1GQUxTRSwgDQogICAgICAgICAgICAgICAgICAgICAgbWVzc2FnZT1GQUxTRSwgDQogICAgICAgICAgICAgICAgICAgICAgUi5vcHRpb25zPWxpc3QoZGlnaXRzPTQpLA0KICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aCA9IDUsDQogICAgICAgICAgICAgICAgICAgICAgZmlnLmhlaWdodCA9IDUpDQoNCiMnIFRoaXMgZXhhbXBsZSBzaG93cyB0aGUgZGV2ZWxvcG1lbnQgb2YgcGxvdHMgb2YgQW5kcmUtTWljaGVsIEd1ZXJyeSdzICgxODMzKSBkYXRhIG9uIGNyaW1lIGFuZCBsaXRlcmFjeSBpbiBGcmFuY2UNCg0KIycgIyMgTG9hZCBwYWNrYWdlcw0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShnZ3JlcGVsKQ0KDQojJyAjIyBMb2FkIHRoZSBkYXRhDQpkYXRhKEd1ZXJyeSwgcGFja2FnZT0iR3VlcnJ5IikNCmhlYWQoR3VlcnJ5WywgMTo5XSkNCg0KDQojJyAjIyBTdGFydCB3aXRoIGJhc2ljIHNjYXR0ZXJwbG90IA0KZ2dwbG90KGFlcyh4PUxpdGVyYWN5LCB5PUNyaW1lX3BlcnMvMTAwMCksIGRhdGE9R3VlcnJ5KSArDQogIGdlb21fcG9pbnQoc2l6ZT0yKSANCiMgZ2dzYXZlKCJndWVycnktY3JpbWUxLnBuZyIsIHdpZHRoPTUsIGhlaWdodD01KQ0KDQojJyAjIyBBZGQgZGF0YSBlbGxpcHNlcw0KIycgRGF0YSBlbGxpcHNlcyBnaXZlIGEgdmlzdWFsIGFzc2Vzc21lbnQgb2YgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4gdmFyaWFibGVzDQpnZ3Bsb3QoYWVzKHg9TGl0ZXJhY3ksIHk9Q3JpbWVfcGVycy8xMDAwKSwgZGF0YT1HdWVycnkpICsNCiAgZ2VvbV9wb2ludChzaXplPTIpICsNCiAgc3RhdF9lbGxpcHNlKGxldmVsPTAuNjgsIGNvbG9yPSJibHVlIiwgc2l6ZT0xLjIpICsgIA0KICBzdGF0X2VsbGlwc2UobGV2ZWw9MC45NSwgY29sb3I9ImdyYXkiLCBzaXplPTEsIGxpbmV0eXBlPTIpIA0KIyBnZ3NhdmUoImd1ZXJyeS1jcmltZTIucG5nIiwgd2lkdGg9NSwgaGVpZ2h0PTUpDQogIA0KIycgIyMgQWRkIHNtb290aHMNCg0KZ2dwbG90KGFlcyh4PUxpdGVyYWN5LCB5PUNyaW1lX3BlcnMvMTAwMCksIGRhdGE9R3VlcnJ5KSArDQogIGdlb21fcG9pbnQoc2l6ZT0yKSArDQogIHN0YXRfZWxsaXBzZShsZXZlbD0wLjY4LCBjb2xvcj0iYmx1ZSIsIHNpemU9MS4yKSArICANCiAgc3RhdF9lbGxpcHNlKGxldmVsPTAuOTUsIGNvbG9yPSJncmF5Iiwgc2l6ZT0xLCBsaW5ldHlwZT0yKSArIA0KICBnZW9tX3Ntb290aChtZXRob2Q9ImxtIiwgZm9ybXVsYT15fngsIGZpbGw9ImxpZ2h0Ymx1ZSIpICsNCiAgZ2VvbV9zbW9vdGgobWV0aG9kPSJsb2VzcyIsIGZvcm11bGE9eX54LCBjb2xvcj0icmVkIiwgc2U9RkFMU0UpIA0KIyBnZ3NhdmUoImd1ZXJyeS1jcmltZTMucG5nIiwgd2lkdGg9NSwgaGVpZ2h0PTUpDQoNCiMnICMjIERvIHNvbWUgc3R5bGluZyAgDQpncGxvdCA8LSBsYXN0X3Bsb3QoKQ0KZ3Bsb3QgKyB0aGVtZV9idyhiYXNlX3NpemUgPSAxOCkgDQoNCiMnICMjIFB1dCB0aGUgc3RlcHMgdG9nZXRoZXINCmdncGxvdChhZXMoeD1MaXRlcmFjeSwgeT1DcmltZV9wZXJzLzEwMDApLCBkYXRhPUd1ZXJyeSkgKw0KICBnZW9tX3BvaW50KHNpemU9MikgKw0KICBzdGF0X2VsbGlwc2UobGV2ZWw9MC42OCwgY29sb3I9ImJsdWUiLCBzaXplPTEuMikgKyAgDQogIHN0YXRfZWxsaXBzZShsZXZlbD0wLjk1LCBjb2xvcj0iZ3JheSIsIHNpemU9MSwgbGluZXR5cGU9MikgKyANCiAgZ2VvbV9zbW9vdGgobWV0aG9kPSJsbSIsIGZvcm11bGE9eX54LCBmaWxsPSJsaWdodGJsdWUiKSArDQogIGdlb21fc21vb3RoKG1ldGhvZD0ibG9lc3MiLCBmb3JtdWxhPXl+eCwgY29sb3I9InJlZCIsIHNlPUZBTFNFKSArDQogIHRoZW1lX2J3KCkgKyB0aGVtZSh0ZXh0ID0gZWxlbWVudF90ZXh0KHNpemU9MTgpKQ0KIyBnZ3NhdmUoImd1ZXJyeS1jcmltZTQucG5nIiwgd2lkdGg9NSwgaGVpZ2h0PTUpDQoNCg0KDQojJyAjIyBVc2UgTWFoYWxhbm9iaXMgRHNxIHRvIGxhYmVsIHVudXN1YWwgcG9pbnRzDQpnZGYgPC0gR3VlcnJ5WywgYygiTGl0ZXJhY3kiLCAiQ3JpbWVfcGVycyIsICJEZXBhcnRtZW50IildDQpnZGYkZHNxIDwtIGhlcGxvdHM6Ok1haGFsYW5vYmlzKGdkZlssIDE6Ml0pDQoNCmdncGxvdChhZXMoeD1MaXRlcmFjeSwgeT1DcmltZV9wZXJzLzEwMDApLCBkYXRhPUd1ZXJyeSkgKw0KICBnZW9tX3BvaW50KHNpemU9MikgKw0KICBzdGF0X2VsbGlwc2UobGV2ZWw9MC42OCwgY29sb3I9ImJsdWUiLCBzaXplPTEuMikgKyAgDQogIHN0YXRfZWxsaXBzZShsZXZlbD0wLjk1LCBjb2xvcj0iZ3JheSIsIHNpemU9MSwgbGluZXR5cGU9MikgKyANCiAgZ2VvbV9zbW9vdGgobWV0aG9kPSJsbSIsIGZvcm11bGE9eX54LCBmaWxsPSJsaWdodGJsdWUiKSArDQogIGdlb21fc21vb3RoKG1ldGhvZD0ibG9lc3MiLCBmb3JtdWxhPXl+eCwgY29sb3I9InJlZCIsIHNlPUZBTFNFKSArDQoJZ2VvbV9sYWJlbF9yZXBlbChhZXMobGFiZWw9RGVwYXJ0bWVudCksIGRhdGEgPSBnZGZbZ2RmJGRzcSA+IDQuNixdKSArDQogIHRoZW1lX2J3KCkgKyB0aGVtZSh0ZXh0ID0gZWxlbWVudF90ZXh0KHNpemU9MTgpKQ0KIyBnZ3NhdmUoImd1ZXJyeS1jcmltZTUucG5nIiwgd2lkdGg9NSwgaGVpZ2h0PTUpDQoNCmdwbG90ICsgDQogICAgIHRoZW1lX2J3KCkgKyANCiAgICAgdGhlbWUodGV4dCA9IGVsZW1lbnRfdGV4dChzaXplPTE4KSkgKyAgICAgICAgDQoJICAgZ2VvbV9sYWJlbF9yZXBlbChhZXMobGFiZWw9RGVwYXJ0bWVudCksIGRhdGEgPSBnZGZbZ2RmJGRzcSA+IDQuNixdKQ0KDQoNCiMnICMjIERvIHRoZSBzYW1lIGZvciBwcm9wZXJ0eSBjcmltZQ0KDQpnZGYgPC0gR3VlcnJ5WywgYygiTGl0ZXJhY3kiLCAiQ3JpbWVfcHJvcCIsICJEZXBhcnRtZW50IildDQpnZGYkZHNxIDwtIGhlcGxvdHM6Ok1haGFsYW5vYmlzKGdkZlssIDE6Ml0pDQoNCmdncGxvdChhZXMoeD1MaXRlcmFjeSwgeT1DcmltZV9wcm9wLzEwMDApLCBkYXRhPUd1ZXJyeSkgKw0KICBnZW9tX3BvaW50KHNpemU9MikgKw0KICBzdGF0X2VsbGlwc2UobGV2ZWw9MC42OCwgY29sb3I9ImJsdWUiLCBzaXplPTEuMikgKyAgDQogIHN0YXRfZWxsaXBzZShsZXZlbD0wLjk1LCBjb2xvcj0iZ3JheSIsIHNpemU9MSwgbGluZXR5cGU9MikgKyANCiAgZ2VvbV9zbW9vdGgobWV0aG9kPSJsbSIsIGZvcm11bGE9eX54LCBmaWxsPSJsaWdodGJsdWUiKSArDQogIGdlb21fc21vb3RoKG1ldGhvZD0ibG9lc3MiLCBmb3JtdWxhPXl+eCwgY29sb3I9InJlZCIsIHNlPUZBTFNFKSArDQoJZ2VvbV9sYWJlbF9yZXBlbChhZXMobGFiZWw9RGVwYXJ0bWVudCksIGRhdGEgPSBnZGZbZ2RmJGRzcSA+IDQuNixdKSArDQogIHRoZW1lX2J3KCkgKyB0aGVtZSh0ZXh0ID0gZWxlbWVudF90ZXh0KHNpemU9MTgpKQ0KDQojIGdnc2F2ZSgiZ3VlcnJ5LWNyaW1lLXByb3AucG5nIiwgd2lkdGg9NSwgaGVpZ2h0PTUpDQoNCg0K