;+
; NAME:
;    days_to_months
;
; PURPOSE:
;    This function converts daily data to monthly data.
;
; CATEGORY:
;    Calendar
;
; CALLING SEQUENCE:
;    Result = days_to_months( var_data, time_data=time_data, $
;        time_units=time_units, operation=operation )
;
; INPUTS:
;    VAR_DATA:  A required numerical two-dimensional input array of size 
;        N_DAY,N_POINT, where N_POINT is some arbitrary number of points 
;        with time series of length N_DAY.  This contains the input daily data.
;    OPERATION, OUT_TIME_UNITS, TIME_CALENDAR, TIME_DATA, TIME_UNITS
;
; OUTPUTS:
;    RESULT:  Returns an output array of size N_MONTH,N_POINT, containing the 
;        N_MONTH-long monthly data for the N_POINTS calculated from VAR_DATA.
;    TIME_DATA, TIME_UNITS
;
; KEYWORD PARAMETERS:
;    NAN:  The NAN option in a number of IDL functions, dictating that NaN 
;        values should be ignored.
;    OPERATION:  The mathematic operation to perform in converting daily data 
;        to monthly data.  Required.
;        * 'average' or 'mean':  Take the arithmetic average.
;        * 'median':  Take the median.
;        * 'mode':  Take the mode.
;        * 'none':  Retain daily data, just performing extraction to 
;            month/season.
;        * 'stddev':  Take the standard deviation.
;        * 'total':  Take the total.
;    OUT_TIME_UNITS:  An optional scalar string specifying the units of the 
;        time vector output in TIME_DATA.  The default is 'yyyymm'.
;    SEASON_START:  An optional scalar integer identifying the index of the 
;        first month in a season to which to restrict output.  SEASON_LEN=0 is 
;        for a season starting in January, etc.
;    SEASON_LEN:  An optional SCALAR integer defining the number of months in a 
;        season to which to restrict output.
;    TIME_CALENDAR:  The optional CALENDAR keyword option for the 
;        convert_time_format.pro function, if conversion to the default 
;        'yyyymmdd' format is required.  Default is 'gregorian'.
;    TIME_DATA:  A required floating point vector of length N_DAY containing 
;        daily time values.  Also outputs the monthly time vector.
;    TIME_UNITS:  A required scalar string specifying the units of the time 
;        vector.  Also output the units of the output monthly time vector.
;
; USES:
;    convert_time_format.pro
;    isin.pro
;    mode.pro
;    str.pro
;    var_type.pro
;
; PROCEDURE:
;    This function identifies which days belong to each month, and then 
;    calculates the summary statistic specified in OPERATION across those days.
;
; EXAMPLE:
;    ; To calculate the mean of days since 2000-01-01 for February 2000 and 
;    ; 2001.
;    days = findgen ( 365 + 365 + 1 ) + 0.5
;    time = days
;    month_mean = days_to_months( days, time_data=time, operation='mean', $
;        time_units='days since 2000-01-01T00:00:00', season_len=3, $
;        season_start=1 )

;
; MODIFICATION HISTORY:
;    Written by:  Daithi A. Stone (dastone@runbox.com), 2019-08-01.
;    Modified:  DAS, 2020-05-19 (Added 'none' option for OPERATION;  Completed 
;        documentation)
;    Modified:  DAS, 2022-02-22 (Corrected consideration of leap days when the 
;        time series does not start with a leap year)
;-

;***********************************************************************

FUNCTION DAYS_TO_MONTHS, $
    VAR_DATA, $
    OPERATION=operation, $
    OUT_TIME_UNITS=out_time_units, $
    SEASON_LEN=season_len, SEASON_START=season_start, $
    TIME_DATA=time_data, TIME_UNITS=time_units, TIME_CALENDAR=time_calendar, $
    NAN=nan_opt

;***********************************************************************
;Variables and Constants

; Number of months in a year
mina = 12

; Ensure an operation is defined
if not( keyword_set( operation ) ) then stop

; Ensure input data consistency
n_day = n_elements( time_data )
if n_day eq 0 then stop
if not( keyword_set( var_data ) ) then stop
if n_elements( var_data[*,0] ) ne n_day then stop
n_point = n_elements( var_data[0,*] )
if n_elements( time_units ) ne 1 then stop

; The default Gregorian calendar
if not( keyword_set( time_calendar ) ) then time_calendar = 'gregorian'
if n_elements( time_calendar ) ne 1 then stop
if strpos( strlowcase( time_calendar ), 'gregorian' ) ge 0 then begin
  leapyear_opt = 1
endif else begin
  leapyear_opt = 0
endelse

; Option for seasonal values
season_opt = keyword_set( season_len )
if ( season_opt eq 1 ) and ( n_elements( season_start ) ne 1 ) then stop

;***********************************************************************
; Perform conversion from daily to monthly format

; Produce a 'yyyymmdd' version of the time vector
if time_units eq 'yyyymmdd' then begin
  time_data_ymd = time_data
endif else begin
  time_data_ymd = convert_time_format( time_data, time_units, 'yyyymmdd', $
      calendar=time_calendar )
endelse
time_data_ym = strmid( time_data_ymd, 0, 6 )

; Build the output yyyymm time vector
id_month = uniq( time_data_ym )
out_time_data_ym = time_data_ym[id_month]
n_month = n_elements( id_month )

; A seasonal data flag, if needed
if season_opt eq 1 then flag_season = intarr( n_month )

; Iterate through months
for i_month = 0, n_month - 1 do begin
  ; Identify the time indices for days in this month
  if season_opt eq 0 then begin
    id_day = where( time_data_ym eq out_time_data_ym[i_month], n_id_day )
  ; Or identify the time indices for days in the season
  endif else begin
    ; If this month is not yet considered
    if flag_season[i_month] eq 0 then begin
      ; If this is the first month in a season
      temp = ( fix( strmid( out_time_data_ym[i_month], 4, 2 ) ) - 1 ) mod mina
      if temp eq ( season_start mod mina ) then begin
        ; Define the months to include
        index = i_month + indgen( season_len )
        ; If this series of months goes beyond the available data
        if i_month + season_len gt n_month then begin
          ; Do not do any more calculations
          n_id_day = 0
        ; Otherwise find the days
        endif else begin
          ; Identify days in input data
          id_day = where( isin( out_time_data_ym[index], time_data_ym ) eq 1, $
              n_id_day )
          if n_id_day eq 0 then stop
          ; Flag the months in this season as done
          flag_season[index] = 1
          flag_season[index[0]] = 2
        endelse
      ; Do not do any more calculations
      endif else begin
        n_id_day = 0
      endelse
    ; If this month was already included in a seasonal value
    endif else begin
      ; Do not do any more calculations
      n_id_day = 0
    endelse
  endelse
  ; If there is data for this month
  if n_id_day gt 0 then begin
    ; Perform the requested operation
    if max( operation eq [ 'average', 'mean' ] ) eq 1 then begin
      if n_point eq 1 then begin
        temp_data = mean( var_data[id_day], nan=nan_opt )
      endif else begin
        temp_data = mean( var_data[id_day,*], dimension=1, nan=nan_opt )
      endelse
    endif else if operation eq 'median' then begin
      temp_data = median( var_data[id_day,*], dimension=1 )
    endif else if operation eq 'mode' then begin
      temp_data = !values.f_nan * fltarr( n_id_day )
      for i_point = 0, n_point - 1 do begin
        temp_data[i_point] = mode( var_data[id_day,i_point], $
            exclude_nan=nan_opt, include_nan=1-nan_opt )
      endfor
    endif else if operation eq 'stddev' then begin
      temp_data = stddev( var_data[id_day,*], dimension=1, nan=nan_opt )
    endif else if operation eq 'total' then begin
      temp_data = total( var_data[id_day,*], 1, nan=nan_opt )
    endif else if operation eq 'none' then begin
      temp_data = var_data[id_day,*]
    endif else begin
      stop
    endelse
    ; If we are retaining daily data
    if operation eq 'none' then begin
      ; If this is the first season
      if not( keyword_set( out_var_data ) ) then begin
        n_day_0 = n_id_day
        if leapyear_opt eq 1 then begin
          if max( strmid( time_data_ymd[id_day], 4, 4 ) eq '0229' ) eq 0 $
              then begin
            if max( strmid( time_data_ymd, 4, 4 ) eq '0229' ) eq 1 then begin
              n_day_0 = n_day_0 + 1
            endif
          endif
        endif
        out_var_data = !values.f_nan * fltarr( n_day_0, 1, n_point )
        out_var_data[0:n_id_day-1,0,*] = reform( temp_data, n_id_day, 1, $
            n_point )
      ; If this is a subsequent season
      endif else begin
        temp_data = reform( temp_data, n_id_day, 1, n_point )
        if ( leapyear_opt eq 1 ) and ( n_id_day eq n_day_0 - 1 ) then begin
          temp_data = [ temp_data, !values.f_nan ]
        endif
        out_var_data = [ [ out_var_data ], [ temp_data ] ]
      endelse
    ; If we have performed an operation
    endif else begin
      ; If this is the first season
      if not( keyword_set( out_var_data ) ) then begin
        ;n_day_0 = 1
        out_var_data = !values.f_nan * fltarr( n_month, n_point )
      endif
      ; Record data
      out_var_data[i_month,*] = temp_data
    endelse
    ; If we have not performed an operation
    if operation eq 'none' then begin
      ; Extract time data
      temp_data = time_data_ymd[id_day]
      ; If we are to output a time vector with specified units
      if keyword_set( out_time_units ) then begin
        temp_data = convert_time_format( temp_data, 'yyyymmdd', $
            out_time_units, calendar=time_calendar )
      endif
      if ( leapyear_opt eq 1 ) and ( n_day_0 - n_id_day eq 1 ) then begin
        if var_type( temp_data ) eq 7 then begin
          temp_data = [ temp_data, '' ]
        endif else begin
          temp_data = [ temp_data, !values.f_nan ]
        endelse
      endif
      ; Initialise output time array
      if n_elements( out_time_data ) eq 0 then begin
        out_time_data = temp_data
      ; Or add to existing time array
      endif else begin
        out_time_data = [ [ out_time_data ], [ temp_data ] ]
      endelse
    ; If we have performed an operation
    endif else begin
      ; Set time to middle of month/season
      temp_units = 'days since ' + strmid (time_data_ymd[0], 0, 4 ) $
          + '-01-01T00:00:00'
      temp_data = convert_time_format( time_data_ymd[id_day] + '12', $
          'yyyymmddhh', temp_units, calendar=time_calendar )
      temp_data = mean( temp_data )
      if keyword_set( out_time_units ) then begin
        ; Add month to output time vector
        temp_data = convert_time_format( temp_data, temp_units, $
            out_time_units, calendar=time_calendar )
        if n_elements( out_time_data ) eq 0 then begin
          out_time_data = temp_data
        endif else begin
          out_time_data = [ out_time_data, temp_data ]
        endelse
        ; Record monthly date
      endif else begin
        if n_elements( out_time_data_ym ) eq 0 then begin
          out_time_data_ym = strarr( n_month )
        endif
        out_time_data_ym[i_month] = convert_time_format( temp_data, $
            temp_units, 'yyyymm', calendar=time_calendar )
      endelse
    endelse
  endif
endfor

; Restrict to annual entries for seasonal data
if ( season_opt eq 1 ) and ( operation ne 'none' ) then begin
  id = where( flag_season eq 2, n_month )
  if n_month eq 0 then stop
  if keyword_set( out_time_units ) then begin
    out_time_data = out_time_data[id]
  endif else begin
    out_time_data_ym = out_time_data_ym[id]
  endelse
  out_var_data = out_var_data[id,*]
endif

; Copy time vector and units to output
if keyword_set( out_time_units ) or ( operation eq 'none' ) then begin
  time_data = out_time_data
  if keyword_set( out_time_units ) then begin
    time_units = out_time_units
  endif else begin
    time_units = 'yyyymmdd'
  endelse
endif else begin
  time_data = out_time_data_ym
  out_time_units = 'yyyymm'
endelse

;***********************************************************************
; The end

return, out_var_data
END
