###Plot Figure 1

##Setup
#Load ONS data
load("data/ONS2018_allc.RData")

#Panel labels
supp.labs1 <- c("All", "Parity 0", "Parity 1", "Parity 2", "Parity 3+")
names(supp.labs1) <- c(0, 1, 2, 3, 4)
supp.labs2 <- c("UKHLS", "ONS")
names(supp.labs2) <- c(1, 2)

#Function to aggregate births and exposures by age and cohort
wratesfunc <- function(w, c, a, b, FUN = length) {
  data <- aggregate(w, by = list(a, c), FUN = FUN)
  data2 <- aggregate(w[b == 1], by = list(a[b == 1], c[b == 1]), FUN = FUN)
  data <- left_join(data, data2, by = c("Group.1", "Group.2")) %>%
    mutate(x.y = replace_na(x.y, 0))
  data$x <- data$x.y/data$x.x
  data
}


##Prepare UKHLS data (weighted births and exposures)
datw <- rbind(cbind(type = 1, par = 0, wratesfunc(weights, coh, age, birth.bin)),
               cbind(type = 1, par = 1, wratesfunc(weights_0, coh_0, age_0, birth.bin_0)),
               cbind(type = 1, par = 2, wratesfunc(weights_1, coh_1, age_1, birth.bin_1)),
               cbind(type = 1, par = 3, wratesfunc(weights_2, coh_2, age_2, birth.bin_2)),
               cbind(type = 1, par = 4, wratesfunc(weights_3, coh_3, age_3, birth.bin_3)))
names(datw) <- c("type", "par", "age", "coh", "N", "n", "x")


##Prepare ONS data
dat2 <- data.frame(rbind(cbind(type = 2, par = 0,
                               age = ONS_births_dat[[5]]$Group.1,
                               coh = ONS_births_dat[[5]]$Group.2,
                               N = ONS_expos_dat[[5]]$x,
                               n = ONS_births_dat[[5]]$x,
                               x = ONS_rates_dat[[5]]$x),
                         cbind(type = 2, par = 1,
                               age = ONS_births_dat[[1]]$Group.1,
                               coh = ONS_births_dat[[1]]$Group.2,
                               N = ONS_expos_dat[[1]]$x,
                               n = ONS_births_dat[[1]]$x,
                               x = ONS_rates_dat[[1]]$x),
                         cbind(type = 2, par = 2,
                               age = ONS_births_dat[[2]]$Group.1,
                               coh = ONS_births_dat[[2]]$Group.2,
                               N = ONS_expos_dat[[2]]$x,
                               n = ONS_births_dat[[2]]$x,
                               x = ONS_rates_dat[[2]]$x),
                         cbind(type = 2, par = 3,
                               age = ONS_births_dat[[3]]$Group.1,
                               coh = ONS_births_dat[[3]]$Group.2,
                               N = ONS_expos_dat[[3]]$x,
                               n = ONS_births_dat[[3]]$x,
                               x = ONS_rates_dat[[3]]$x),
                         cbind(type = 2, par = 4,
                               age = ONS_births_dat[[4]]$Group.1,
                               coh = ONS_births_dat[[4]]$Group.2,
                               N = ONS_expos_dat[[4]]$x,
                               n = ONS_births_dat[[4]]$x,
                               x = ONS_rates_dat[[4]]$x)))

#Restrict to 1945-1999 cohorts
dat2 %<>% filter(coh %in% 1945:1999)

#Join datw and dat2
dat3 <- rbind(datw, dat2)

#Construct cohort categorical variable
dat3 %<>% mutate(cohc = ceiling((coh - (min(coh) - 6))/10))

#Aggregate births and exposures across cohort categories
dat4 <- dat3 %>% group_by(type, par, cohc, age) %>%
  summarise(n = sum(n), N = sum(N), Nn = N-n, x = n/N)

#Remove age-cohort combinations where not all cohorts in the category would be observed due to the 
#cutoff (2007 for UKHLS, 2018 for ONS)
dat4 %<>% filter((cohc %in% 1:3 & type == 2)|(cohc %in% 1:2 & type == 1)|
           (cohc == 4 & type == 2 & age <= 39)|(cohc == 5 & type == 2 & age <= 29)|
           (cohc == 6 & type == 2 & age <= 19)|(cohc == 3 & type == 1 & age <= 38)|
           (cohc == 4 & type == 1 & age <= 28)|(cohc == 5 & type == 1 & age <= 18))

#Omit rates with small underlying counts (births or exposures <= 2)
dat4 %<>% filter(n > 2 & Nn > 2)

#Label cohort categories
dat4$cohc <- c("1945-49", "1950-59", "1960-69", "1970-79", "1980-89", "1990-99")[dat4$cohc]

##Plot rates
png(file = "plots/fig1.png", width = 17.5, height = 8, units = "cm", res = 400)
print({dat4 %>% ggplot(aes(x = age, y = x, color = cohc)) +
  geom_point(aes(group = cohc), size = 0.8) +
  labs(x = "Age", y = "Observed fertility rate", color = "Cohort") +
  scale_color_manual(values=hcl.colors(6,"viridis",rev=T)) +
  scale_y_continuous(breaks = seq(0, 0.5, 0.1)) +
  scale_x_continuous(limits = c(15, 44), breaks = seq(15, 40, 5),
                     minor_breaks = setdiff(15:44, seq(15, 40, 5))) +
  coord_cartesian(ylim = c(0, 0.35)) +
  theme_bw() + theme(legend.position = "right", text = element_text("Calibri")) +
  facet_grid(type ~ par, labeller = labeller(par = supp.labs1, type = supp.labs2))})
dev.off()
