XiaO

Using ggplot2 to do an analysis of Nobel Prize Laureates

XiaO / 2020-04-23


First, let us load all the libraries needed and set a tansparent theme which can be used for all the plots generated.

library(tidyverse)
library(lubridate) # to handal dates easily
library(viridisLite)

theme_xiao <- function () { 
  theme_light(base_size=12, base_family="Avenir Next") %+replace%  # Here you need to define the font you may use in your figure
    theme(
      # Background of the entire plot
      plot.background = element_rect(fill="transparent", colour = NA), 
      
      # Background of plotting area
      panel.background = element_rect(fill = "transparent", colour = NA), 
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      
      legend.background = element_rect(fill="transparent", colour=NA),
      legend.key = element_rect(fill="transparent", colour=NA),
      legend.box.background = element_rect(fill = "transparent", colour = NA)
    )}

Then read in the data of Nobel Winners from RawGithub and clean it for further use.

df <- read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-05-14/nobel_winners.csv")

nobel_winners <- df %>%
  mutate_if(is.character, tolower) %>% 
  
  # Delete the duplicated rows according to the columns selected 
  distinct_at(vars(full_name, prize_year, category), .keep_all = TRUE) %>% 
  mutate(prize_decade = as.factor(10 * (prize_year %/% 10)),
         prize_age = prize_year - year(birth_date),
         birth_decade =  floor(year(birth_date)/10) * 10 )

Figure 1

Distribution of Nobel Prize Laureates’ ages in different dacades.

nobel_winners %>%
  group_by(prize_decade) %>%
  ggplot(aes(prize_decade, prize_age, fill = prize_decade)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.2,color="#000000") +
  scale_fill_viridis_d(alpha = 0.8, begin =0, end = 0.4, option = "D") +
  theme_xiao() +
  theme(legend.position = "none") +
  labs(title = "Distribution of Nobel Prize Laureates' ages in different dacades", 
       x = "Dacade", 
       y = "Age (years)")
en-20200423-1

Figure 2

Distribution of Nobel Prize Laureates’ ages in different subjects.

  nobel_winners %>% 
  mutate(category = fct_reorder(category, prize_age, na.rm = TRUE)) %>%
  ggplot(aes(y = prize_age,
             x = category,
             fill = category)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.2,color="#000000") +
  scale_fill_viridis_d("Category",alpha = 0.9, begin=0, end=1, option = "D") +
  theme_xiao() +
  theme(legend.position = "none") +
  labs(title = "Distribution of Nobel Prize Laureates' ages in different subjects", 
       x = "Subjects", 
       y = "Age (years)")
en-20200423-2

Figure 3

Number and genders of Nobel Prize Laureates in different decades.

  nobel_winners %>%
  mutate(gender = fct_explicit_na(gender)) %>%
  count(prize_decade) %>%
  ggplot(aes(x=prize_decade, y = n, color = prize_decade)) +
  geom_segment(aes(xend = prize_decade, yend = 0), size =1) +
  geom_point(size = 5) +
  theme_xiao() +
  theme(legend.position = "none") +
  scale_color_viridis_d(alpha = 1, begin = 0, end = 0.8, direction = 1, option = "D") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 150)) +
  labs(title = "Number of Nobel Prize Laureates in different decades", 
       x = "Decades", 
       y = "Number of people")
en-20200423-3

Figure 4

Number of Nobel Price Laureates in different decades by subjects.

nobel_winners %>%
  mutate(gender = fct_explicit_na(gender),
         category = fct_explicit_na(category)) %>%
  count(prize_decade, gender, category) %>%
  ggplot(aes(prize_decade, gender, fill = n)) +
  geom_tile(size = 0.7) +
  geom_text(aes(label = scales::number(n))) + 
  facet_grid(vars(category)) +
  scale_fill_viridis_c("Number", alpha = 1, begin = 0.1, end = 0.9, direction = -1, option = "D") +
  theme_xiao() +
  labs(title = "Number of Nobel Price Laureates in different decades by subjects", 
       x = "Decades", 
       y = "")
en-20200423-4

Figure 5

Number of Nobel Price Laureates in different decades by gender.

nobel_winners %>%
  mutate(gender = fct_explicit_na(gender),
         category = fct_explicit_na(category)) %>%
  count(prize_decade, gender, category) %>%
  ggplot(aes(prize_decade, category, fill = n)) +
  geom_tile(size = 0.7) +
  geom_text(aes(label = scales::number(n))) + 
  facet_grid(vars(gender)) +
  scale_fill_viridis_c("Number", alpha = 1, begin = 0.1, end = 0.9, direction = -1, option = "D") +
  theme_xiao() +
  labs(title = "Number of Nobel Price Laureates in different decades by gender", 
       x = "Decades", 
       y = "")
en-20200423-5

Figure 6

Subject distribution of Nobel Price Laureates in different decades by gender

  nobel_winners %>%
  mutate(gender = fct_explicit_na(gender),
         category = fct_explicit_na(category)) %>%
  count(prize_decade, category, gender) %>%
  group_by(prize_decade, gender) %>%
  mutate(prop = n / sum(n)) %>%
  ggplot(aes(prize_decade, category, fill = prop)) +
  geom_tile(size = 0.7) +
  geom_text(aes(label = scales::number(prop, accuracy = .01))) + 
  facet_grid(vars(gender)) +
  scale_fill_viridis_c("Ratio", alpha = 1, begin = 0.1, end = 0.9, direction = -1, option = "D") +
  theme_xiao() +
  labs(title = "Subject distribution of Nobel Price Laureates in different decades by gender", 
       x = "Decades", 
       y = "Subjects")
en-20200423-6

Figure 7

Gender distribution of Nobel Price Laureates in different decades by subjects.

nobel_winners %>%
  mutate(gender = fct_explicit_na(gender),
         category = fct_explicit_na(category)) %>%
  count(prize_decade, gender, category) %>%
  group_by(prize_decade, category) %>%
  mutate(prop = n / sum(n)) %>%
  ggplot(aes(prize_decade, gender, fill = prop)) +
  geom_tile(size = 0.7) +
  geom_text(aes(label = scales::number(prop, accuracy = .01))) + 
  facet_grid(vars(category)) +
  scale_fill_viridis_c("Ratio", alpha = 1, begin = 0.2, end = 0.8, direction = -1, option = "D") +
  theme_xiao() +
  labs(title = "Gender distribution of Nobel Price Laureates in different decades by subjects", 
         x = "Decades", 
         y = "")
en-20200423-7

Figure 8

Time distribution of Nobel Price Laureates in different subjects by Gender.

nobel_winners %>%
  mutate(gender = fct_explicit_na(gender),
         category = fct_explicit_na(category)) %>%
  count(prize_decade, gender, category) %>%
  group_by(category, gender) %>%
  mutate(prop = n / sum(n)) %>%
  ggplot(aes(prize_decade, gender, fill = prop)) +
  geom_tile() +
  geom_text(aes(label = scales::number(prop, accuracy = .01))) + 
  facet_grid(vars(category)) +
  scale_fill_viridis_c("Ratio", alpha = 1, begin = 0, end = 0.9, direction = -1, option = "D") +
  theme_xiao() +
  labs(title = "Gender distribution of Nobel Price Laureates in different decades by subjects", 
       x = "Decades", 
       y = "")
en-20200423-8

Figure 9

Born Decades of Nobel Prize Laureates in different subjects

nobel_winners %>% 
  mutate(category = fct_explicit_na(category),
         gender = fct_explicit_na(gender)) %>%
  #  slice(1:3)
  filter(laureate_type == "Individual") %>% 
  count(category, birth_decade) %>% 
  ggplot(aes(x = birth_decade, y = n, fill = category, color = category)) +
  geom_segment(aes(xend = birth_decade, yend = 0)) +
  geom_point(size = 2) +
  theme_xiao() +
  theme(legend.position = "none") +
  scale_color_viridis_d(alpha = 0.9, begin= 0, end = 0.9, direction = 1, option = "D") +
  scale_x_continuous(breaks = seq(1810, 1990, 40)) +
  geom_text(aes(label = n), 
            vjust = -1, 
            position = position_dodge(width=2), 
            size = 3, check_overlap = TRUE) +
  facet_wrap(vars(category)) +
  labs(title = "Born Decades of Nobel Prize Laureates in different subjects", 
       x = "Decades", 
       y = "People")
en-20200423-9

Figure 10

Average age vs total number of prizes in different subjects

nobel_winners %>%
  group_by(category) %>%
  summarise(prize_number = n(),
            mean_prize_age = mean(prize_age, na.rm = T)) %>%
  ggplot(aes(prize_number, mean_prize_age)) +
  geom_smooth(method = "lm",formula = y ~ x, color = "#777777") +
  geom_point(aes(color = category), size = 4) +
  scale_color_viridis_d("Subjects") +
  theme_xiao() +
  theme(legend.position="bottom") +
  labs(title = "Total prize number in different subjects vs average age of Laureates",
       x = "Prize Number",
       y = "Averge age")
en-20200423-10

Finally, choose the format of your figure output and save them.

svglite::svglite(file = "Rplots.svg", width = 3.6, height = 7,  bg="transparent", pointsize = 20, standalone = TRUE)
put your figure file here
dev.off()