###Process NPP data

##Download NPP data
#Download zip file containing England & Wales and Great Britain principal 2018-based NPPs (xls files)
url <- "https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationprojections/datasets/z2zippedpopulationprojectionsdatafilesgbandenglandandwales/2018based/tablez2opendata18ewandgb.zip"
curl_download(url,destfile="data/ONS_2018_NPP_GB_EW.zip")
 
#Extract England & Wales xls file
filenames <- as.character(unzip("data/ONS_2018_NPP_GB_EW.zip", list = TRUE)$Name)
unzip("data/ONS_2018_NPP_GB_EW.zip", filenames[1], exdir="data")

#Delete zip file
unlink("data/ONS_2018_NPP_GB_EW.zip")

#Open the XML file "ew_ppp_opendata2018.xml" (saved in "data") within Excel and save as an xlsx file
#with file name "ew_ppp_opendata2018.xlsx"

#Delete XML file
unlink("data/ew_ppp_opendata2018.xml")


##Process NPP population projections
#Read in midyear population projections for females aged 15-44 for the years 2019-2050
npp <- read_xlsx("data/ew_ppp_opendata2018.xlsx", sheet="Population",range="D124:AI153",col_names=F,col_types="numeric")

#Rename columns
names(npp) <- paste0(2019:2050)

#Add age column
npp <- cbind(age=15:44,npp)

#Convert into long data frame with age, year (yr), population estimate (N) and cohort (coh) columns
npp %<>% pivot_longer(cols=paste0(2019:2050),names_to="yr",values_to="N") %>%
  mutate(yr=as.numeric(yr)) %>% mutate(coh=yr-age) %>% arrange(coh,age)


##Process NPP fertility rate projections
#Read in fertility rate projections for females aged 15-44 for the years 2018-2050
npp2 <- read_xlsx("data/ew_ppp_opendata2018.xlsx", sheet="Fertility_assumptions",range="C2:AI31",col_names=F,col_types="numeric") 

#Rename columns
names(npp2) <- paste0(2018:2050)

#Add age column
npp2 <- cbind(age=15:44,npp2)

#Convert into long data frame with age, year (yr), fertility rate estimate (x) and cohort (coh) columns
npp2 %<>% pivot_longer(cols=paste0(2018:2050),names_to="yr",values_to="x") %>%
  mutate(yr=as.numeric(yr)) %>% mutate(coh=yr-age) %>% arrange(coh,age)

#Compute fertility rate projection for year x as average of year x-1 and year x 
#(as currently the rate for year x corresponds to year x - year x+1, i.e. the end
#of year x rather than the middle of year x as desired)
npp2 %<>% group_by(age) %>% mutate(x=(x+lag(x))/2) %>% filter(!is.na(x))

#Divide fertility rates by 1000 to express as true rates
npp2 %<>% mutate(x=x/1000)


##Save NPP data
save(npp,npp2,file="data/ew_npp_2018.RData")
