###Process UKHLS Wave 1 data

##Setup
#Read in files
#Information for all persons in the household
a_indall   <- read.table(paste0(dir,"/a_indall_protect.tab"), sep = "\t", header = T)
#Substantive data for responding adults (16+)
a_indresp  <- read.table(paste0(dir,"/a_indresp_protect.tab"), sep = "\t", header = T)
#Information for all children (0-15 years) in the household
a_child    <- read.table(paste0(dir,"/a_child_protect.tab"), sep = "\t", header = T)
#Information about all biological children born to the sample members
a_natchild <- read.table(paste0(dir,"/a_natchild_protect.tab"), sep = "\t", header = T)

#Create 'a_childpno' (person number of child) column in a_indall/a_natchild
a_indall   %<>% mutate(a_childpno = a_pno)
a_natchild %<>% mutate(a_childpno = a_lchno)

# Arrange a_indall/a_natchild by persons/children within households
a_indall   %<>% arrange(a_hidp, a_childpno)
a_natchild %<>% arrange(a_hidp, a_childpno)


##Create date of birth variables
#Merge in date of birth information from a_child into a_natchild
a_natchild %<>% left_join(a_child %>% select(a_hidp, a_childpno, a_birthy, a_birthm),
                          by = c("a_hidp", "a_childpno"))

#Rename 'a_birthy' (year of birth) and 'a_birthm' (month of birth) columns in a_natchild
a_natchild %<>% rename("a_birthych" = a_birthy,
                       "a_birthmch" = a_birthm)

#Merge in date of birth information from a_indall into a_natchild
a_natchild %<>% left_join(a_indall %>% select(a_hidp, a_childpno, a_birthy, a_birthm),
                          by = c("a_hidp", "a_childpno"))

#Create new date of birth variables
a_natchild %<>% mutate(a_dvchdobm = case_when(a_lchdobm  > 0 ~ a_lchdobm,
                                              a_birthm   > 0 ~ a_birthm,
                                              a_birthmch > 0 ~ a_birthmch),
                       a_dvchdoby = case_when(a_lchdoby  > 0 ~ a_lchdoby,
                                              a_birthy   > 0 ~ a_birthy,
                                              a_birthych > 0 ~ a_birthych))

#Rename 'a_birthy' (year of birth) and 'a_birthm' (month of birth) columns in a_natchild
a_natchild %<>% rename("a_birthy_indall" = a_birthy,
                       "a_birthm_indall" = a_birthm)

#Merge in date of birth information and sex of parent from a_indall into a_natchild
a_natchild %<>% arrange(a_hidp, a_pno)
a_natchild %<>% left_join(a_indall %>% select(a_hidp, a_pno, a_sex, a_birthy, a_birthm),
                          by = c("a_hidp", "a_pno"))

#Replace missing months of birth with 6.5
a_natchild$a_dvchdobm[which(is.na(a_natchild$a_dvchdobm))] <- 6.5

#Calculate birth years in months since 1900
a_natchild %<>% mutate(dob1900   = (a_birthy - 1900)*12 + a_birthm,
                       dob1900ch = (a_dvchdoby - 1900)*12 + a_dvchdobm)

#Make parent date of birth missing if the year of birth is missing
a_natchild$dob1900[a_natchild$a_birthy < 0] <- NA

#Recreate child number based on date of birth
a_natchild %<>% arrange(a_hidp, a_pno, dob1900ch)
a_natchild %<>% group_by(a_hidp, a_pno) %>%
  mutate(childno2  = 1:n()) %>% ungroup()

#Create variable for number of missing child years of birth
a_natchild %<>% arrange(a_hidp, a_pno)
a_natchild %<>% group_by(a_hidp, a_pno) %>%
  mutate(nmisdoby = sum(is.na(a_dvchdoby))) %>% ungroup()

#Create indicator for person IDs in a_natchild
a_natchild$pres <- 1


##Convert to wide format
tmp <- a_natchild %>% select(pidp, a_sex, dob1900, a_birthm, a_birthy, dob1900ch, 
                             a_dvchdobm, a_dvchdoby, nmisdoby, pres, childno2)

vars_fixed <- c("pidp", "a_sex", "dob1900", "a_birthm", "a_birthy", "nmisdoby", "pres", "childno2")

a_natchild_wide <- tmp %>%
  pivot_longer(-all_of(vars_fixed)) %>%
  pivot_wider(names_from = c("name", "childno2"), values_from = "value")

#Arrange a_natchild_wide by person ID
a_natchild_wide %<>% arrange(pidp)


##Create highest qualification variable
#Merge a_indresp with a_natchild_wide
a_indresp %<>% left_join(a_natchild_wide %>% select(-c("a_sex", "a_birthm", "a_birthy")), by = "pidp")

#Recode highest qualification variable
a_indresp %<>%
  mutate(qualf = 
           case_when(
             a_hiqual_dv == 9 |
               (a_hiqual_dv == 5 &
                  a_qfhigh %in% 13:15) ~ "Less than O Level",
             a_hiqual_dv == 4          ~ "O Level",
             a_hiqual_dv == 3          ~ "A Level",
             a_hiqual_dv %in% 1:2      ~ "Degree",
             a_hiqual_dv == 5 &
               (a_qfhigh == 96 |
                  a_qfhigh < 0)        ~ "Not known",
             a_hiqual_dv < 0           ~ "Not known",
             TRUE                      ~ "Not known"))

#Create 'age left education' variable
a_indresp %<>%
  mutate(agelft = 
           case_when(
             a_scend > a_feend & 
               a_feend > 0 &
               a_scend > 0       ~ a_scend,
             a_feend > 0         ~ a_feend,
             a_scend > 0         ~ a_scend
             ))

#Create revised qualification variable 'qualf_rev', which recodes unknown 'qualf' values
a_indresp %<>%
  mutate(qualf_rev =
           case_when(
             qualf == "Not known" & agelft < 16       ~ "Less than O Level",
             qualf == "Not known" & agelft %in% 16:17 ~ "O Level",
             qualf == "Not known" & agelft %in% 18:19 ~ "A Level",
             qualf == "Not known" & agelft > 19       ~ "Degree",
             TRUE ~ qualf))

#Create adjusted qualification variable 'qualfadj', which recodes late qualifications (Degree -> A Level)      
a_indresp %<>%
  mutate(qualfadj = 
           case_when(
             qualf_rev == "Degree" & (agelft < 18 | agelft > 28) ~ "A Level",
             TRUE                                                ~ qualf_rev))

#Select women
a_indresp %<>% filter(a_sex == 2)
