;+
; NAME:
;    regiondna_conf_regcoef.pro
;
; PURPOSE:
;    This function determines a change factor for the confidence metric used 
;    in the IPCC AR6 WGII detection and attribution assessments, based on the 
;    coefficients from the multiple linear regression.
;
; CATEGORY:
;    REGIONAL D-AND-A CONFIDENCE
;
; CALLING SEQUENCE:
;    result = regiondna_conf_regcoef( option=option )
;
; INPUT:
;    OPTION:  A required scalar string specifying the type of test to perform.  
;        Supported values are:
;        * 'consistent with 1'
;        * 'consistent with gt 0'
;    BETA_EST, FRAC_DIST_BETA_POSITIVE, MAX_DEGRADE, P_LIMIT
;
; KEYWORD PARAMETERS:
;    BETA_EST:  A float array of size 3,N_SCEN,N_SOURCE containing the 
;        regression coefficients output from the multiple linear regression 
;        calculation.  The first dimension contains the best estimate, the 
;        lower P_LIMIT/2 confidence range value, and the upper 1-P_LIMIT/2 
;        confidence range value, respectively.  N_SCEN is the number of output 
;        scenarios and N_SOURCE the number of observation/climate-model 
;        combinations.  Required if OPTION='consistent with 1'.
;    FRAC_DIST_BETA_POSITIVE:  A float array of size N_SCEN,N_SOURCE providing 
;        the probability of the regression coefficient being greater than zero 
;        for of each of the N_SCEN output scenarios and N_SOURCE data source 
;        combinations.  Required of OPTION='consistent with gt 0'.  If 
;        OPTION='consistent with 1' and METHOD='distributions', then this 
;        should provide the probabilities of the regression coefficients begin 
;        greater than one.
;    MAX_DEGRADE:  A scalar float specifying the maximum degradation factor 
;        that can arise from this calculation.  It must in the (0,1] range.  
;        Required if OPTION='consistent with 1'.
;    METHOD:  An optional scalar string specifying the method for evaluation. 
;        Required if OPTION='consistent with 1'.  Possible values are:
;          * 'binomial':  The Stone and Hansen (2016) method, comparing the 
;            frequency that the P_LIMIT significance level is exceeded in the 
;            BETA_EST confidence ranges against a binomial distribution.
;          * 'distribution:  Integrate across the fractions of the probability 
;            distributions on the regression coefficients that are greater than 
;            1, as specified in the FRAC_DIST_BETA_POSITIVE input.  Weightings 
;            can be included via the SOURCE_WEIGHT keyword input.  Note this 
;            interpretes the likelihood distributions as probability 
;            distributions.
;          * 'empirical':  Based on the p-value of 1.0 in the empirical 
;            distribution of regression coefficients.  Weightings can be 
;            included via the SOURCE_WEIGHT keyword input.
;          * 'k-s':  Compare the distribution of estimated p-values of 
;            BETA_EST=1 against a uniform distribution using a 
;            Kolmogorov-Smirnov test.  Weightings can be included via the 
;            SOURCE_WEIGHT keyword input.
;          * 'weighted binomial':  Similar to the Stone and Hansen (2016) 
;            method, but including weightings on each of the samples as input 
;            via the SOURCE_WEIGHT keyword input.
;    P_LIMIT:  An optional scalar float specifying the p-value for statistical 
;        significance tests.  The default is 0.1.
;    SOURCE_WEIGHT:  An optional float vector of size N_SOURCE specifying 
;        weights for the sources when calculating bulk measures.  The default 
;        is 1.0/N_SOURCE for all sources.  The weighting is normalised, so it 
;        is okay if the sum of the weights is not equal to one.
;
; OUTPUT:
;    RESULT:  A float scalar in the range [0,1] specifying the change factor 
;        by which to multiply the confidence metric in ipccar6_conf.pro, 
;        according to evaluation of the regression coefficients.
;
; USES:
;    add_dim.pro
;    threshold_quantile.pro
;    ks_onesample.pro
;
; PROCEDURE:
;    See Stone and Hansen (2016) for more details.
;
; EXAMPLE:
;    See regiondna_conf.pro.
;
; MODIFICATION HISTORY:
;    Written by:  Daithi A. Stone (dastone@runbox.com), 2015-03-23 (As 
;        part of hanseng_conf_assess_up.pro)
;    Modified:  DAS, 2020-10-29 (Adapted from hanseng_conf_assess_up.pro)
;    Modified:  DAS, 2020-11-19 (Added SOURCE_WEIGHT, METHOD keyword inputs, 
;        'k-s' method)
;    Modified:  DAS, 2020-12-11 (Added capability to handle NaN inputs for k-s 
;        option)
;    Modified:  DAS, 2021-02-11 (Added the 'distribution', 'empirical', and 
;        'weighted binomial' options to the METHOD keyword input)
;-

;***********************************************************************

FUNCTION REGIONDNA_CONF_REGCOEF, $
    OPTION, $
    BETA_EST=beta_est, $
    FRAC_DIST_BETA_POSITIVE=frac_dist_beta_positive, $
    MAX_DEGRADE=max_degrade, $
    METHOD=method, $
    P_LIMIT=p_limit, $
    SOURCE_WEIGHT=source_weight

;***********************************************************************
; Constants and options

; Confirm an option is input
if n_elements( option ) ne 1 then stop

; Check valid input
if keyword_set( beta_est ) then begin
  if n_elements( beta_est[*,0,0] ) ne 3 then stop
  ; Count the number of scenarios and coefficients
  n_scen = n_elements( beta_est[0,*,0] )
  n_source = n_elements( beta_est[0,0,*] )
endif else if n_elements( frac_dist_beta_positive ) gt 0 then begin
  ; Count the number of scenarios and coefficients
  n_scen = n_elements( frac_dist_beta_positive[*,0] )
  n_source = n_elements( frac_dist_beta_positive[0,*] )
endif else begin
  stop
endelse

; Check maximum degradation factor
if n_elements( max_degrade ) gt 1 then stop
if n_elements( max_degrade ) eq 1 then begin
  if max_degrade le 0 then stop
  if max_degrade gt 1 then stop
endif

; The default p-value for assigning statistical significance
if not( keyword_set( p_limit ) ) then p_limit = 0.1

; The default number of Monte Carlo samples for determining the statistical 
; significance of the weighted binomial outcome
n_mc_binomial = 10000

; Determine the exponent on a cos such that the 0.5-intercepts are at 
; [p_limit/1,1-p_limit/2].  This is used to convert p-values of tests into 
; confidence change factors.
if option eq 'consistent with 1' then begin
  factor_exp = alog( 0.5 ) / alog( 0.5 * ( 1. - cos( !pi * p_limit ) ) )
endif

; The default weighting for sources
if not( keyword_set( source_weight ) ) then begin
  source_weight_use = ( 1. + fltarr( n_source ) ) / n_source
endif else begin
  if n_elements( source_weight ) ne n_source then stop
  source_weight_use = source_weight / total( source_weight )
endelse
source_weight_use = add_dim( source_weight_use, 0, n_scen ) / n_scen

;***********************************************************************
; Calculate degradation factor

; Determine if there are any calculated regression coefficients
if keyword_set( beta_est ) then begin
  id_source = where( finite( beta_est[0,0,*] ) eq 1, n_id_source )
endif else begin
  id_source = where( finite( frac_dist_beta_positive[0,*] ) eq 1, n_id_source )
endelse
;if n_id_source ne n_source then stop ; I am not sure if this should be possible

; If there are no calculated coefficients
if n_id_source eq 0 then begin
  ; Assign confidence to maximum degradation
  result = 1. - max_degrade
; If there are calculated coefficients
endif else begin
  ; If we want to test consistency with 1
  if option eq 'consistent with 1' then begin
    ; If we are using the Stone and Hansen (2016) approach
    if method eq 'binomial' then begin
      ; Weighting is not supported with this method
      if keyword_set( source_weight ) then stop
      ; Identify cases of inconsistency with 1
      id_beta = where( ( beta_est[1,*,id_source] ge 1. ) $
          or ( beta_est[2,*,id_source] le 1. ), n_id_beta )
      ; Calculate degradation factor
      result = binomial( n_id_beta, n_id_source, p_limit )
      result = ( 0.5 * ( 1. - cos( !pi * result ) ) ) ^ factor_exp
      result = ( 1. - max_degrade ) + max_degrade * result
    ; If we are using a weighted version of the Stone and Hansen (2016) approach
    end else if method eq 'weighted binomial' then begin
      ; Identify cases of inconsistency with 1
      id_beta = where( ( beta_est[1,*,id_source] ge 1. ) $
          or ( beta_est[2,*,id_source] le 1. ), n_id_beta )
      if n_id_beta gt 0 then n_id_beta = total( source_weight_use[id_beta] )
      ; Calculate the p-value of this outcome 
      n_id_beta_mc = fltarr( n_mc_binomial )
      for i_mc = 0, n_mc_binomial - 1 do begin
        temp = randomu( seed, n_id_source )
        id_beta_temp = where( temp le p_limit, n_id_beta_temp )
        if n_id_beta_temp eq 0 then begin
          n_id_beta_mc[i_mc] = 0.
        endif else begin
          n_id_beta_mc[i_mc] = total( source_weight_use[id_beta_temp] )
        endelse
      endfor
      id = where( n_id_beta_mc ge n_id_beta, n_id )
      result = float( n_id ) / float( n_mc_binomial )
      ; Calculate degradation factor
      if result gt 0.5 then result = 1. - result
      result = ( 0.5 * ( 1. - cos( !pi * result ) ) ) ^ factor_exp
      result = ( 1. - max_degrade ) + max_degrade * result
    ; If we are using an approach integrating across the probability 
    ; distributions
    end else if method eq 'distribution' then begin
      ; Integrate across coefficients
      if keyword_set( source_weight ) then begin
        result = total( source_weight_use * frac_dist_beta_positive, nan=1 ) $
            / total( source_weight_use )
      endif else begin
        result = mean( frac_dist_beta_positive, nan=1 )
      endelse
      ; Calculate degradation factor
      if result gt 0.5 then result = 1. - result
      result = ( 0.5 * ( 1. - cos( !pi * result ) ) ) ^ factor_exp
      result = ( 1. - max_degrade ) + max_degrade * result
    ; If we are using a empirical approach
    end else if method eq 'empirical' then begin
      ; Identify cases of inconsistency with 1
      temp_data = reform( beta_est[0,*,*], n_scen * n_source )
      ; Calculate the p-value of 1.0 within this distribution
      result = threshold_quantile( temp_data, 1.0, weight=source_weight_use )
      ; Calculate degradation factor
      if result gt 0.5 then result = 1. - result
      result = ( 0.5 * ( 1. - cos( !pi * result ) ) ) ^ factor_exp
      result = ( 1. - max_degrade ) + max_degrade * result
    ; If we are using the K-S test approach
    endif else if method eq 'k-s' then begin
      ; Determine the p-values of BETA_EST=1.
      beta_p_value_one = fltarr( n_scen, n_id_source )
      for i_source = 0, n_id_source - 1 do begin
        for i_scen = 0, n_scen - 1 do begin
          ; Suppose that the coefficients are Gaussian distributed with 
          ; 1-P_LIMIT confidence ranges BETA_EST[1:2,*,*].
          ; (Given everything else here, this approximation seems justified.)
          temp_mean = beta_est[0,i_scen,id_source[i_source]]
          temp_std = beta_est[1:2,i_scen,id_source[i_source]] - temp_mean
          temp_std = ( temp_std[1] - temp_std[0] ) $
              / ( gauss_cvf( p_limit / 2. ) - gauss_cvf( 1. - p_limit / 2. ) )
          beta_p_value_one[i_scen,i_source] = gauss_pdf( $
              ( 1. - temp_mean ) / temp_std )
        endfor
      endfor
      ; If we have a single scenario-source sample then just take its p-value
      if n_scen * n_id_source eq 1 then begin
        result = beta_p_value_one[0]
      ; Otherwise determine the p-value of the coefficients fits to a uniform 
      ; distribution
      endif else begin
        beta_p_value_one = reform( beta_p_value_one, n_scen * n_id_source )
        temp_weight = reform( source_weight_use[*,id_source], $
            n_scen * n_id_source )
        result = ks_onesample( beta_p_value_one, method='monte carlo', $
            ref_name='uniform', weights=temp_weight )
      endelse
      ; Convert p-value to confidence factor
      ; (2-tailed)
      result = ( 0.5 * ( 1. - cos( 2. * !pi * result ) ) ) ^ factor_exp
      result = ( 1. - max_degrade ) $
          + max_degrade * result * total( source_weight_use[*,id_source] ) $
          / total( source_weight_use )
    ; If the method is not implemented
    endif else begin
      stop
    endelse
  ; If we want lack of consistency with less than zero
  endif else if option eq 'consistent with gt 0' then begin
    ; Take the average fraction of the distribution consistent with being 
    ; greater than zero.
    ; (Note this is interpreting the distributions output from gendetec.pro as 
    ; probability density functions.)
    result = total( source_weight_use * frac_dist_beta_positive, nan=1 )
  ; Not yet supported
  endif else begin
    stop
  endelse
endelse

;***********************************************************************
; The end

return, result
END
