Using ggplot2 to do an analysis of Nobel Prize Laureates
XiaO / 2020-04-23
First, let us load all the libraries needed and set a tansparent theme which can be used for all the plots generated.
library(tidyverse)
library(lubridate) # to handal dates easily
library(viridisLite)
theme_xiao <- function () {
theme_light(base_size=12, base_family="Avenir Next") %+replace% # Here you need to define the font you may use in your figure
theme(
# Background of the entire plot
plot.background = element_rect(fill="transparent", colour = NA),
# Background of plotting area
panel.background = element_rect(fill = "transparent", colour = NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.background = element_rect(fill="transparent", colour=NA),
legend.key = element_rect(fill="transparent", colour=NA),
legend.box.background = element_rect(fill = "transparent", colour = NA)
)}
Then read
in the data of Nobel Winners from RawGithub and clean it for further use.
df <- read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-14/nobel_winners.csv")
nobel_winners <- df %>%
mutate_if(is.character, tolower) %>%
# Delete the duplicated rows according to the columns selected
distinct_at(vars(full_name, prize_year, category), .keep_all = TRUE) %>%
mutate(prize_decade = as.factor(10 * (prize_year %/% 10)),
prize_age = prize_year - year(birth_date),
birth_decade = floor(year(birth_date)/10) * 10 )
Figure 1
Distribution of Nobel Prize Laureates’ ages in different dacades.
nobel_winners %>%
group_by(prize_decade) %>%
ggplot(aes(prize_decade, prize_age, fill = prize_decade)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.2,color="#000000") +
scale_fill_viridis_d(alpha = 0.8, begin =0, end = 0.4, option = "D") +
theme_xiao() +
theme(legend.position = "none") +
labs(title = "Distribution of Nobel Prize Laureates' ages in different dacades",
x = "Dacade",
y = "Age (years)")
Figure 2
Distribution of Nobel Prize Laureates’ ages in different subjects.
nobel_winners %>%
mutate(category = fct_reorder(category, prize_age, na.rm = TRUE)) %>%
ggplot(aes(y = prize_age,
x = category,
fill = category)) +
geom_violin(trim=FALSE) +
geom_boxplot(width=0.2,color="#000000") +
scale_fill_viridis_d("Category",alpha = 0.9, begin=0, end=1, option = "D") +
theme_xiao() +
theme(legend.position = "none") +
labs(title = "Distribution of Nobel Prize Laureates' ages in different subjects",
x = "Subjects",
y = "Age (years)")
Figure 3
Number and genders of Nobel Prize Laureates in different decades.
nobel_winners %>%
mutate(gender = fct_explicit_na(gender)) %>%
count(prize_decade) %>%
ggplot(aes(x=prize_decade, y = n, color = prize_decade)) +
geom_segment(aes(xend = prize_decade, yend = 0), size =1) +
geom_point(size = 5) +
theme_xiao() +
theme(legend.position = "none") +
scale_color_viridis_d(alpha = 1, begin = 0, end = 0.8, direction = 1, option = "D") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 150)) +
labs(title = "Number of Nobel Prize Laureates in different decades",
x = "Decades",
y = "Number of people")
Figure 4
Number of Nobel Price Laureates in different decades by subjects.
nobel_winners %>%
mutate(gender = fct_explicit_na(gender),
category = fct_explicit_na(category)) %>%
count(prize_decade, gender, category) %>%
ggplot(aes(prize_decade, gender, fill = n)) +
geom_tile(size = 0.7) +
geom_text(aes(label = scales::number(n))) +
facet_grid(vars(category)) +
scale_fill_viridis_c("Number", alpha = 1, begin = 0.1, end = 0.9, direction = -1, option = "D") +
theme_xiao() +
labs(title = "Number of Nobel Price Laureates in different decades by subjects",
x = "Decades",
y = "")
Figure 5
Number of Nobel Price Laureates in different decades by gender.
nobel_winners %>%
mutate(gender = fct_explicit_na(gender),
category = fct_explicit_na(category)) %>%
count(prize_decade, gender, category) %>%
ggplot(aes(prize_decade, category, fill = n)) +
geom_tile(size = 0.7) +
geom_text(aes(label = scales::number(n))) +
facet_grid(vars(gender)) +
scale_fill_viridis_c("Number", alpha = 1, begin = 0.1, end = 0.9, direction = -1, option = "D") +
theme_xiao() +
labs(title = "Number of Nobel Price Laureates in different decades by gender",
x = "Decades",
y = "")
Figure 6
Subject distribution of Nobel Price Laureates in different decades by gender
nobel_winners %>%
mutate(gender = fct_explicit_na(gender),
category = fct_explicit_na(category)) %>%
count(prize_decade, category, gender) %>%
group_by(prize_decade, gender) %>%
mutate(prop = n / sum(n)) %>%
ggplot(aes(prize_decade, category, fill = prop)) +
geom_tile(size = 0.7) +
geom_text(aes(label = scales::number(prop, accuracy = .01))) +
facet_grid(vars(gender)) +
scale_fill_viridis_c("Ratio", alpha = 1, begin = 0.1, end = 0.9, direction = -1, option = "D") +
theme_xiao() +
labs(title = "Subject distribution of Nobel Price Laureates in different decades by gender",
x = "Decades",
y = "Subjects")
Figure 7
Gender distribution of Nobel Price Laureates in different decades by subjects.
nobel_winners %>%
mutate(gender = fct_explicit_na(gender),
category = fct_explicit_na(category)) %>%
count(prize_decade, gender, category) %>%
group_by(prize_decade, category) %>%
mutate(prop = n / sum(n)) %>%
ggplot(aes(prize_decade, gender, fill = prop)) +
geom_tile(size = 0.7) +
geom_text(aes(label = scales::number(prop, accuracy = .01))) +
facet_grid(vars(category)) +
scale_fill_viridis_c("Ratio", alpha = 1, begin = 0.2, end = 0.8, direction = -1, option = "D") +
theme_xiao() +
labs(title = "Gender distribution of Nobel Price Laureates in different decades by subjects",
x = "Decades",
y = "")
Figure 8
Time distribution of Nobel Price Laureates in different subjects by Gender.
nobel_winners %>%
mutate(gender = fct_explicit_na(gender),
category = fct_explicit_na(category)) %>%
count(prize_decade, gender, category) %>%
group_by(category, gender) %>%
mutate(prop = n / sum(n)) %>%
ggplot(aes(prize_decade, gender, fill = prop)) +
geom_tile() +
geom_text(aes(label = scales::number(prop, accuracy = .01))) +
facet_grid(vars(category)) +
scale_fill_viridis_c("Ratio", alpha = 1, begin = 0, end = 0.9, direction = -1, option = "D") +
theme_xiao() +
labs(title = "Gender distribution of Nobel Price Laureates in different decades by subjects",
x = "Decades",
y = "")
Figure 9
Born Decades of Nobel Prize Laureates in different subjects
nobel_winners %>%
mutate(category = fct_explicit_na(category),
gender = fct_explicit_na(gender)) %>%
# slice(1:3)
filter(laureate_type == "Individual") %>%
count(category, birth_decade) %>%
ggplot(aes(x = birth_decade, y = n, fill = category, color = category)) +
geom_segment(aes(xend = birth_decade, yend = 0)) +
geom_point(size = 2) +
theme_xiao() +
theme(legend.position = "none") +
scale_color_viridis_d(alpha = 0.9, begin= 0, end = 0.9, direction = 1, option = "D") +
scale_x_continuous(breaks = seq(1810, 1990, 40)) +
geom_text(aes(label = n),
vjust = -1,
position = position_dodge(width=2),
size = 3, check_overlap = TRUE) +
facet_wrap(vars(category)) +
labs(title = "Born Decades of Nobel Prize Laureates in different subjects",
x = "Decades",
y = "People")
Figure 10
Average age vs total number of prizes in different subjects
nobel_winners %>%
group_by(category) %>%
summarise(prize_number = n(),
mean_prize_age = mean(prize_age, na.rm = T)) %>%
ggplot(aes(prize_number, mean_prize_age)) +
geom_smooth(method = "lm",formula = y ~ x, color = "#777777") +
geom_point(aes(color = category), size = 4) +
scale_color_viridis_d("Subjects") +
theme_xiao() +
theme(legend.position="bottom") +
labs(title = "Total prize number in different subjects vs average age of Laureates",
x = "Prize Number",
y = "Averge age")
Finally, choose the format of your figure output and save them.
svglite::svglite(file = "Rplots.svg", width = 3.6, height = 7, bg="transparent", pointsize = 20, standalone = TRUE)
put your figure file here
dev.off()