###Fit integrated GAMs - parity 1

##Setup
#Load ONS data
load("data/ONS2018_allc.RData")

#Source functions
source("r/integrated_functions.r")

back <- FALSE # change to TRUE if performing backtesting


##GAM setup
source("r/p1_setup.r")
betadim <- 3*ncol(B_A2)+ncol(B_C2)+ncol(B_T2)+ncol(B_AC4)
S1 <- S2 <- S3 <- S4 <- S5 <- S6 <- S7 <- matrix(0,betadim,betadim)
S1[1:ncol(B_A2),1:ncol(B_A2)] <- S_A2
S2[(1:ncol(B_A2)+ncol(B_A2)),(1:ncol(B_A2)+ncol(B_A2))] <- S_A2
S3[(1:ncol(B_A2)+2*ncol(B_A2)),(1:ncol(B_A2)+2*ncol(B_A2))] <- S_A2
S4[(1:ncol(B_C2)+3*ncol(B_A2)),(1:ncol(B_C2)+3*ncol(B_A2))] <- S_C2
S5[(1:ncol(B_T2)+3*ncol(B_A2)+ncol(B_C2)),(1:ncol(B_T2)+3*ncol(B_A2)+ncol(B_C2))] <- S_T2
S6[(1:ncol(B_AC4)+3*ncol(B_A2)+ncol(B_C2)+ncol(B_T2)),(1:ncol(B_AC4)+3*ncol(B_A2)+ncol(B_C2)+ncol(B_T2))] <- S_AF
S7[(1:ncol(B_AC4)+3*ncol(B_A2)+ncol(B_C2)+ncol(B_T2)),(1:ncol(B_AC4)+3*ncol(B_A2)+ncol(B_C2)+ncol(B_T2))] <- S_CF
B_A21 <- B_A22 <- B_A2
B_A21[which(data1$qualf2bimp_1!=1),] <- 0
B_A22[which(data1$qualf2bimp_1!=2),] <- 0
X <- cbind(B_A2,B_A21,B_A22,B_C2,B_T2,B_AC4)
wt <- data1w$nw/data1w$n
Xq <- matrix(0,N,2)
for (q in 1:2) Xq[,q] <- ifelse(data1$qualf2bimp_1==q,1,0)


##ONS data setup
data <- data.frame(age=ONS_births_dat[[2]]$Group.1,coh=ONS_births_dat[[2]]$Group.2,N=ONS_expos_dat[[2]]$x,n=ONS_births_dat[[2]]$x,x=ONS_rates_dat[[2]]$x)
data <- data[which(data$N>0 & data$n>=0 & data$n <= data$N & data$age %in% c(15:44)),]
if (back) data <- data[which(data$age + data$coh <= 2013),]
ONSN <- data$N
ONSn <- data$n
Nm <- nrow(data)
data1f <- expand.grid(age=agerange,coh=cohrange,gapc=1:11,qualf=1:2)
data1f <- left_join(data1f,data,by=c("age","coh"))
ACind <- which(!is.na(data1f$N))
ACind <- ACind[ACind <= nrow(data1f)/22]
data1f <- data1f[which(!is.na(data1f$N)),]
B_A2f <- B_A2[A_ind[data1f$age-14],]
B_A21f <- B_A22f <- B_A2f
B_A21f[which(data1f$qualf!=1),] <- 0
B_A22f[which(data1f$qualf!=2),] <- 0
Xf <- cbind(B_A2f,B_A21f,B_A22f,
            B_C2all[C_indall[data1f$coh-1944],],
            B_T2[T_ind[data1f$gapc],],
            B_ACfull3[rep(ACind,22),])
Nf <- nrow(Xf)
Afind <- c(data1f$age-14)[1:Nm]
Xqf <- matrix(0,Nf,2)
for (q in 1:2) Xqf[,q] <- ifelse(data1f$qualf==q,1,0)


##QAC model setup
newdata1 <- aggregate(qualf2b_1 ~ age_1 + coh_1, FUN = function(x) c(y=length(x), q1=length(x[x==1]), q2=length(x[x==2])), subset=coh_1 <= 1982)
newdata1 <- data.frame(newdata1$age_1,newdata1$coh_1,newdata1$qualf2b_1)
colnames(newdata1) <- c("a","c","y","y1","y2")
newdata1$a <- newdata1$a - median(15:44)
newdata1$yw <- aggregate(weights_1st ~ age_1 + coh_1, FUN = sum, subset = coh_1 <= 1982)$weights_1st
newdata1$wtmult <- newdata1$yw/newdata1$y
colnames(newdata1) <- c("a","c","y",paste0(1:2),"yw","wtmult")
qmax1 <- 2

newdata12 <- expand.grid(a=agerange,c=cohrange)
newdata12$cc <- newdata12$c-median(1945:1982)
newdata12$cind72 <- ifelse(newdata12$c >= 1972, 1972, newdata12$c)
newdata12$ccind72 <- newdata12$cind72-1944
newdata12$cc72 <- ifelse(newdata12$c<=1971,0,newdata12$cc)
newdata12$aind <- newdata12$a-14
newdata12$a <- newdata12$a-median(15:44)

newdata12 <- left_join(newdata12,newdata1,by=c("a","c"))
Nac <- nrow(newdata12)
y <- newdata12[,paste0(1:qmax1)]
yind <- which(!is.na(y[,1]))
Nobs <- length(yind)
y[is.na(y)] <- 0
cc72 <- newdata12$cc72
ccind72 <- newdata12$ccind72
Nc72 <- max(ccind72)
aind <- newdata12$aind
Na <- max(aind)
wtc <- newdata12$wtmult
wtc[is.na(wtc)] <- 0


##TAQ model setup
newdata1 <- aggregate(gapc_1 ~ age_1 + qualf2b_1, FUN = function(x) c(y=length(x), t1=length(x[x==1]), t2=length(x[x==2]), t3=length(x[x==3]), t4=length(x[x==4]), t5=length(x[x==5]), t6=length(x[x==6]), t7=length(x[x==7]), t8=length(x[x==8]), t9=length(x[x==9]), t10=length(x[x==10]), t11=length(x[x==11])), subset = coh_1 <= 1982)
newdata1 <- data.frame(newdata1$age_1,newdata1$qualf2b_1,newdata1$gapc_1)
newdata1$yw <- aggregate(weights_1st ~ age_1 + qualf2b_1, FUN = sum, subset = coh_1 <= 1982)$weights_1st
newdata1$wtmult <- newdata1$yw/newdata1$y
colnames(newdata1) <- c("a","q","y",paste0(1:11),"yw","wtmult")
newdata1.1 <- newdata1[newdata1$q==1,]
newdata1.2 <- newdata1[newdata1$q==2,]
rownames(newdata1.1) <- newdata1.1$a
rownames(newdata1.2) <- newdata1.2$a

y1 <- newdata1.1[,paste0(1:11)]
y2 <- newdata1.2[,paste0(1:11)]
Nab <- nrow(y1)+1
a <- as.numeric(rownames(y1))-median(as.numeric(rownames(y1)))
a2 <- a^2
ab <- y1
for (i in 3:11) ab[,i] <- c(rep(0,i-3),1:(Na-(i-3)))
ab[,2] <- 2:(Na+1)
ab <- ab[,-1]
ab[ab==0] <- 1
wt1c <- newdata1.1[rownames(y1),"wtmult"]
wt2c <- newdata1.2[rownames(y2),"wtmult"]


##Fit integrated models
standata <- list(N=N,Nm=Nm,Nf=Nf,Nac=Nac,Na=Na,Nab=Nab,Nc72=Nc72,Nobs=Nobs,
                 betadim=betadim,succ=succ,tot=tot,ONSN=ONSN,ONSn=ONSn,
                 y=y,y1=y1,y2=y2,ccind72=ccind72,cc72=cc72,
                 aind=aind,a=a,a2=a2,ab=ab,Afind=Afind,ACind=ACind,yind=yind,
                 wtc=wtc,wt1c=wt1c,wt2c=wt2c,wt=wt,X=X,Xq=Xq,Xf=Xf,Xqf=Xqf,
                 S1=S1,S2=S2,S3=S3,S4=S4,S5=S5,S6=S6,S7=S7)

#1:1
stanout <- stan(file="stan/p1_1_1.stan",data=standata,chains=1,iter=2000)
if (!back) save(stanout,file="output/p1_1_1.RData")
if ( back) save(stanout,file="output/p1_1_1_2013.RData")

# #9:1
# stanout <- stan(file="stan/p1_9_1.stan",data=standata,chains=1,iter=2000)
# if (!back) save(stanout,file="output/p1_9_1.RData")
# if ( back) save(stanout,file="output/p1_9_1_2013.RData")
# 
# #2:1
# stanout <- stan(file="stan/p1_2_1.stan",data=standata,chains=1,iter=2000)
# if (!back) save(stanout,file="output/p1_2_1.RData")
# if ( back) save(stanout,file="output/p1_2_1_2013.RData")
# 
# #1:2
# stanout <- stan(file="stan/p1_1_2.stan",data=standata,chains=1,iter=2000)
# if (!back) save(stanout,file="output/p1_1_2.RData")
# if ( back) save(stanout,file="output/p1_1_2_2013.RData")
# 
# #1:9
# stanout <- stan(file="stan/p1_1_9.stan",data=standata,chains=1,iter=2000)
# if (!back) save(stanout,file="output/p1_1_9.RData")
# if ( back) save(stanout,file="output/p1_1_9_2013.RData")
