;+
; NAME:
;    spline_of_means.pro
;
; PURPOSE:
;    This function adjusts data obtained from spline interpolation of averages 
;    tohighger frequency such that the original averaged values are re-obtained 
;    upon averaging of the spline output.
;
; CATEGORY:
;    Time Series Analysis
;
; CALLING SEQUENCE:
;    out_dep = spline_of_means( in_indep, in_dep, out_indep )
;
; INPUTS:
;    IN_DEP:  A required floating point vector of size N_IN containing the 
;        data to be interpolated.  The data are averages across the independent 
;        variable at the locations recorded in IN_INDEP.
;    IN_INDEP:  A required floating point vector of size N_IN containing the 
;        locations of the mid-points of the averaging intervals used in the 
;        calculation of IN_DEP.  Values must be in units of distance from the 
;        beginning of the first interval, so e.g. the first value must be half 
;        the length of the first interval.  For instance, if we are starting 
;        with a January temporal mean, the first value must be 31/2=15.5.  
;        Values must be in ascending order.
;    OUT_INDEP:  A required floating point vector of size N_OUT containing the 
;        locations of the mid-points of the spline interpolation output.  
;        Values must be in ascending order.  
;
; KEYWORD PARAMETERS:
;    DOUBLE:  If set then the calculation is performed in double precision.  
;        The default is the format of the data.
;    ADJ_FACTOR:  An optional scalar float specifying the factor by which to 
;        adjust the interpolated output at each iterate step.  This factor 
;        is multiplied by the difference between re-interpolated and input 
;        dependent data to determine the adjustment.  The default is 1.0.
;    TOL_THRESH:  An optional scalar float specifying the tolerance threshold 
;        at which convergence of the iterative solution is considered close 
;        enough.  The default is 0.001, assuming TOL_TYPE='relative' (i.e. 
;        0.1%).
;    TOL_TYPE:  An optional scalar string specifying the type of tolerance 
;        threshold input in TOL_THRESH.  If 'absolute', then the threshold is 
;        defined as when the standard deviation of the discrepancies between 
;        the re-interpolated data and the input IN_DEP data.  If 'relative', 
;        then the thresold is the fraction of the discepancies and to the 
;        standard deviation of IN_DEP.  The default is 'relative'.
;
; OUTPUTS:
;    OUT_DEP:  Returns a floating point vector of length N_OUT containing the 
;        adjusted spline interpolation data at the locations specified in 
;        OUT_INDEP.
;
; USES:
;    -
;
; PROCEDURE:
;    This function uses a the IDL spline function and interatively adjusts 
;    until the specified tolerance is achieved.  It assumes that locations in 
;    OUT_INDEP fit entirely within locations within IN_INDEP, such as days in a 
;    month but not weeks in a month, and that they are spread evenly within the 
;    IN_INDEP locations, such as all 31 days in January but not a random 19 
;    days in January.
;
; EXAMPLE:
;    ; A double sinusoid over a period of a 365-day year
;    time_out = 0.5 + findgen( 365. )
;    data_sin = sin( time_out / 365. * 2. * !pi )
;    index_month = month_day( indgen( 12 ) )
;    time_in = reform( index_month[0,*] + index_month[1,*] + 1. ) / 2.
;    data_in = fltarr( 12 )
;    for i = 0, 11 do data_in[i] = mean( data_sin[index_month[0,i]:index_month[1,i]] )
;    data_out = spline_of_means( time_in, data_in, time_out )
;    plot, time_out, data_sin, yrange=[-1.2,1.2], ystyle=1
;    tek_color
;    oplot, time_in, data_in, color=2
;    oplot, time_out, data_out, color=4

;
; MODIFICATION HISTORY:
;    Written by:  Daithi Stone (dastone@runbox.com), 2022-04-21
;    Modified:  DAS, 2022-08-05 (Corrected error in matching days with months;  
;        Corrected error in calculating monthly averages from days)
;-

;***********************************************************************

FUNCTION SPLINE_OF_MEANS, $
    IN_INDEP, IN_DEP, OUT_INDEP, $
    ADJ_FACTOR=adj_factor, $
    EXTEND_CYCLE=extend_cycle, $
    TOL_THRESH=tol_thresh, TOL_TYPE=tol_type, $
    DOUBLE=double_opt

;***********************************************************************
; Constants

; Determine the dimensions of the input and output
n_in = n_elements( in_indep )
if n_elements( in_dep ) ne n_in then stop
n_out = n_elements( out_indep )

; The default tolerance (0.1% of IN_DEP standard deviation)
if not( keyword_set( tol_type ) ) then tol_type = 'fraction'
if max( tol_type eq [ 'absolute', 'fraction' ] ) eq 0 then stop
if not( keyword_set( tol_thresh ) ) then tol_thresh = 0.001
; Calculate the tolerance for a fractional type
if ( tol_type eq 'fraction' ) and not( keyword_set( top_thresh ) ) then begin
  tol_thresh_use = stddev( in_dep ) * tol_thresh
; Otherwise copy the tolerance value
endif else begin
  tol_thresh_use = tol_thresh
endelse

; The default adjustment factor for use in each iterative step
if not( keyword_set( adj_factor ) ) then adj_factor = 1.

; Option for double precision arithmetic
double_opt = keyword_set( double_opt )

;***********************************************************************
; Do adjustments to produce mean-preserving interpolation

; Determine the intervals of the input locations
in_indep_len = 2 * in_indep
for i_in = 1, n_in - 1 do begin
  in_indep_len[i_in] = 2 $
      * ( in_indep[i_in] - in_indep[i_in-1] - in_indep_len[i_in-1] / 2. )
endfor
; Determine the mapping on out_indep locations to in_indep locations
id_indep_out_to_in = -1 + intarr( 2, n_in )
for i_in = 0, n_in - 1 do begin
  id = where( ( out_indep ge in_indep[i_in] - in_indep_len[i_in] / 2. ) $
      and ( out_indep lt in_indep[i_in] + in_indep_len[i_in] / 2. ), n_id )
  if n_id gt 0 then id_indep_out_to_in[*,i_in] = id[[0,n_id-1]]
endfor

; Calculate the spline fit
out_dep = spline( in_indep, in_dep, out_indep, double=double_opt )
; Copy IN_DEP to version that will be adjusted
in_dep_adj = in_dep

; Iterate until the fit passes the tolerance threshold
check_flag = 0
while check_flag eq 0 do begin
  ; Calculate the IN_INDEP averages of the OUT_DEP estimate
  out_dep_to_in = !values.f_nan * fltarr( n_in )
  for i_in = 0, n_in - 1 do begin
    if id_indep_out_to_in[0,i_in] ne -1 then begin
      out_dep_to_in[i_in] = mean( $
          out_dep[id_indep_out_to_in[0,i_in]:id_indep_out_to_in[1,i_in]] )
    endif
  endfor
  ; Calculate the standard deviation of the difference from the input IN_DEP 
  ; values
  temp_diff = stddev( out_dep_to_in - in_dep, nan=1 )
  ; Compare to tolerance threshold
  if temp_diff le tol_thresh_use then check_flag = 1
  ; If we do not yet pass the tolerance
  if check_flag eq 0 then begin
    ; Adjust IN_DEP values to compensate for error
    if min( id_indep_out_to_in ) ge 0 then begin
      in_dep_adj = in_dep_adj - adj_factor * ( out_dep_to_in - in_dep )
    endif else begin
      id = where( finite( out_dep_to_in ) eq 1 )
      in_dep_adj[id] = in_dep_adj[id] $
          - adj_factor * ( out_dep_to_in[id] - in_dep[id] )
    endelse
    ; Calculate adjusted spline fit
    out_dep = spline( in_indep, in_dep_adj, out_indep, double=double_opt )
  endif
endwhile

;***********************************************************************
; The end

return, out_dep
END
