##########################################################################################
#
# PROGRAM: This function is usefull to fit the variable dispersion beta regressions 
#          with parametric link functions
#
# AUTHORS: Diego Ramos Canterle and Fábio Mariano Bayer
#
# E-MAIL: bayer@ufsm.br
#
# DATE: 11/2015
#
# REFERENCE:
#
#   Canterle, D.R. and Bayer, F.M. (2015) Variable dispersion beta regressions with parametric link functions
#
##########################################################################################

source("aux.R")

"br.fit" <- function(x,z,y,diag=1){
  
  x <- as.matrix(x)
  x1 <- cbind(rep(1,length(y)),x)
  z <- as.matrix(z)
  z1 <- cbind(rep(1,length(y)),z)
  y <- as.vector(y)
  
  r <- ncol(x1)
  s <- ncol(z1)
  n <- length(y)
  
  eta1<-c()
  eta2<-c()
    
  loglik <- function(theta){
    beta <- as.vector(theta[1:r])
    gamma <- as.vector(theta[(r+1):(r+s)])
    lambda1 <- as.numeric(theta[r+s+1])
    lambda2 <- as.numeric(theta[r+s+2])
    
    eta1 <- as.vector(x1%*%beta)
    eta2 <- as.vector(z1%*%gamma)
    
    mu <- as.vector(linkinv(eta1,lambda1))
    sigma <- as.vector(linkinv(eta2,lambda2))
    phi <- as.vector(((1-(sigma^2))/(sigma^2)))
    
    ll <- sum(lgamma(phi)-lgamma(mu*phi)-lgamma((1-mu)*phi)+(mu*phi-1)*log(y)+((1-mu)*phi-1)*log(1-y))
    ll
  }
  
  escore <- function(theta){
    beta <- as.vector(theta[1:r])
    gamma <- as.vector(theta[(r+1):(r+s)])
    lambda1 <- as.numeric(theta[r+s+1])
    lambda2 <- as.numeric(theta[r+s+2])
    
    eta1 <- as.vector(x1%*%beta)
    eta2 <- as.vector(z1%*%gamma)
    
    mu <- as.vector(linkinv(eta1,lambda1))
    sigma <- as.vector(linkinv(eta2,lambda2))
    phi <- as.vector(((1-(sigma^2))/(sigma^2)))
    ystar = as.vector(log( y / (1.0 - y) ))
    mustar = as.vector(digamma(mu*phi)-digamma((1.0-mu)*phi))
    
    PHI = diag( phi )
    T = diag(as.vector(mu.eta(eta1,lambda1)))
    H = diag(as.vector(mu.eta(eta2,lambda2)))
    
    ydag = as.vector(log( 1.0 - y ))
    mudag = as.vector(digamma((1.0 - mu) * phi) - digamma(phi))
    M = diag(mu)
    a = as.vector(((-2.0 / sigma^3)*(M %*% (ystar-mustar)+(ydag-mudag))))
    rho = as.vector((1/lambda1)*((1/(exp(-eta1)+lambda1))-(log(1+lambda1*exp(eta1))/lambda1))*((1+lambda1*exp(eta1))^(-1/lambda1)))
    varrho = as.vector((1/lambda2)*((1/(exp(-eta2)+lambda2))-(log(1+lambda2*exp(eta2))/lambda2))*((1+lambda2*exp(eta2))^(-1/lambda2)))
    
    Ubeta  = t(x1) %*% T %*% PHI %*% (ystar-mustar)
    Ugama  = t(z1) %*% H %*% a
    Ul1 = sum(phi*(ystar-mustar)*rho)
    Ul2 = sum(a*varrho)
    
    c(Ubeta,Ugama,Ul1,Ul2)
  }
  

  ystar = log( y / (1.0 - y) )
  betaols<- (lm.fit(x1, ystar))$coefficients
  etaols = x1 %*% betaols
  muols = exp(etaols) / (1.0 + exp(etaols))
  varols = as.numeric(t(ystar - etaols) %*% (ystar-etaols)) / ((n-r)*((1 /(muols*(1-muols)))^(2)))
  phiols = ( muols * (1.0-muols) / varols ) - 1.0
  sigmaini = sqrt( 1.0 / (1.0 + phiols))
  gamaols = (lm.fit(z1, log(sigmaini/(1.0-sigmaini))))$coefficients
  lambda1_ini <- 1
  lambda2_ini <- 1
  
  ini <- c(betaols,gamaols,lambda1_ini,lambda2_ini)
  
  eta1 <- as.vector(x1%*%betaols)
  eta2 <- as.vector(z1%*%gamaols)
  
  opt <- optim(ini, loglik, escore, 
                method = "BFGS", control = list(fnscale = -1, maxit = 500, reltol = 1e-9))

  if (opt$conv != 0)
    warning("FUNCTION DID NOT CONVERGE!")
  

  k <- c()
  coef <- opt$par
  k$coef <- coef
  k$conv <- opt$convergence
  k$loglik <- opt$value
  k$counts <- as.numeric(opt$counts[1])
  k$betas <- opt$par[1:r]
  k$gammas <- opt$par[(r+1):(r+s)]
  k$lambda1 <- opt$par[r+s+1]
  k$lambda2 <- opt$par[r+s+2]
  k$y <- y
  k$mu.x <- x
  k$sigma.x <- z
  
  
  eta1_hat <- as.vector(x1%*%(k$betas))
  eta2_hat <- as.vector(z1%*%(k$gammas))
  k$mu.lp <- eta1_hat
  k$sigma.lp <- eta2_hat

  mu_hat <- as.vector(linkinv(eta1_hat,(k$lambda1)))
  sigma_hat <- as.vector(linkinv(eta2_hat,(k$lambda2)))
  k$mu.fv <- mu_hat
  k$sigma.fv <- sigma_hat
  
  phi_hat <- ((1-sigma_hat^2)/(sigma_hat^2))
  PHI <- diag(phi_hat)
  W <- diag(phi_hat*(trigamma(mu_hat*phi_hat)+trigamma((1-mu_hat)*phi_hat))*((diflink(mu_hat,(k$lambda1)))^(-2)))
  C <- diag(phi_hat*(2/(sigma_hat^3))*((1-mu_hat)*trigamma((1-mu_hat)*phi_hat)-mu_hat*trigamma(mu_hat*phi_hat)))
  T <- diag(as.vector(mu.eta(eta1_hat,(k$lambda1))))
  H <- diag(as.vector(mu.eta(eta2_hat,(k$lambda2))))
  V <- diag((phi_hat^2)*(trigamma(mu_hat*phi_hat)+trigamma((1-mu_hat)*phi_hat)))
  rho <- as.vector((1/(k$lambda1))*((1/(exp(-eta1_hat)+(k$lambda1)))-(log(1+(k$lambda1)*exp(eta1_hat))/(k$lambda1)))*
                  ((1+(k$lambda1)*exp(eta1_hat))^(-1/(k$lambda1))))
  varrho <- as.vector((1/(k$lambda2))*((1/(exp(-eta2_hat)+(k$lambda2)))-(log(1+(k$lambda2)*exp(eta2_hat))/(k$lambda2)))*
                     ((1+(k$lambda2)*exp(eta2_hat))^(-1/(k$lambda2))))
  D <- diag((4/(sigma_hat^6))*(-trigamma(phi_hat)+(mu_hat^2)*trigamma(mu_hat*phi_hat)+((1-mu_hat)^2)*trigamma((1-mu_hat)*phi_hat)))
  
  Kbb <- t(x1)%*%PHI%*%W%*%x1
  Kbg <- t(x1)%*%C%*%T%*%H%*%z1
  Kgb <- t(Kbg)
  Kbl1 <- t(x1)%*%V%*%T%*%rho
  Kl1b <- t(Kbl1)
  Kbl2 <- t(x1)%*%C%*%T%*%varrho
  Kl2b <- t(Kbl2)
  Kgg <- t(z1)%*%D%*%H%*%t(H)%*%z1
  Kgl1 <- t(z1)%*%C%*%H%*%rho
  Kl1g <- t(Kgl1)
  Kgl2 <- t(z1)%*%D%*%H%*%varrho
  Kl2g <- t(Kgl2)
  Kl1l1 <- t(rho)%*%V%*%rho
  Kl1l2 <- t(rho)%*%C%*%varrho
  Kl2l1 <- t(Kl1l2)
  Kl2l2 <- t(varrho)%*%D%*%varrho
  
  K <- rbind(
    cbind(Kbb,Kbg,Kbl1,Kbl2),
    cbind(Kgb,Kgg,Kgl1,Kgl2),
    cbind(Kl1b,Kl1g,Kl1l1,Kl1l2),
    cbind(Kl2b,Kl2g,Kl2l1,Kl2l2)
    )
  
  k$K <- K
  
  vcov <- chol2inv(chol(K))
  k$vcov <- vcov
  
  ystar <- as.vector(log( y / (1.0 - y) ))
  mustar <- as.vector(digamma(mu_hat*phi_hat)-digamma((1.0-mu_hat)*phi_hat))
  var_y <- as.vector((mu_hat*(1-mu_hat))/(1+phi_hat))
  var_ystar <- as.vector(trigamma(mu_hat*phi_hat)+trigamma((1-mu_hat)*phi_hat))
  H_resid <- as.matrix(sqrt(W%*%PHI)%*%x1%*%solve(t(x1)%*%PHI%*%W%*%x1)%*%t(x1)%*%sqrt(PHI%*%W))
  h <- as.vector(diag(H_resid))

  k$resid <- (y-mu_hat)/sqrt(var_y)

  resid.type2 <- (ystar-mustar)/sqrt(var_ystar*(1-h))
  k$resid.type2 <- resid.type2

  k$Cook <- (h/(1-h))*(resid.type2^2)
  
  stderror <- sqrt(diag(vcov))
  k$stderror <- stderror
  
  R2 <- 1-exp((-2/n)*(opt$value-beta.nul(y)$loglik))
  k$R2 <- R2
  
  k$zstat <- abs(coef[1:(r+s)]/stderror[1:(r+s)])
  k$pvalues <- 2*(1 - pnorm(k$zstat) )
  
  k$loglik <- opt$value
  k$counts <- as.numeric(opt$counts[1])
  k$aic <- -2*k$loglik+2*(r+s+2)
  k$bic <- -2*k$loglik+log(n)*(r+s+2)
  
  model_presentation <- cbind(round(coef,4),round(k$stderror,4),c(round(k$zstat,4),NA,NA),c(round(k$pvalues,4),NA,NA))
  rownames<-rownames(model_presentation)
  rownames[1]<-"Intercept"
  rownames[r+1]<-"Intercept"
  rownames[r+s+1]<-"lambda1"
  rownames[r+s+2]<-"lambda2"
  colnames(model_presentation)<-c("Estimate","Std. Error","z value","Pr(>|z|)")
  rownames(model_presentation)<-rownames
  
  mp1 <- model_presentation[(1:r),(1:4)]
  mp2 <- model_presentation[((r+1):(r+s)),(1:4)]
  mp3 <- model_presentation[(r+s+1):(r+s+2),(1:2)]
  
  k$model$mean <- mp1
  k$model$dispersion <- mp2
  k$model$link <- mp3
  
  if(diag==1)
  {
    print("Mean model",quote=F)
    print(mp1)
    print("Dispersion model",quote=F)
    print(mp2)
    print("Link functions",quote=F)
    print(mp3)
    print(" ",quote=F)
    print(c("Log-likelihood:",round(k$loglik,4)),quote=F)
    print(c("Number of iterations in BFGS optim:",k$counts),quote=F)
    print(c("AIC:",round(k$aic,4)),quote=F)
    print(c("Pseudo R-square:",round(k$R2,4)),quote=F)
    
    print("standardized weighted residual 2:",quote=F)
    print(summary(resid.type2))
  }
  return(k)
}