;+
; NAME:
;    ipccar6dna_define_source.pro
;
; PURPOSE:
;    This procedures sets lists of data sources to be used in the IPCC AR6 WGII 
;    detection and attribution analyses.
;
; CATEGORY:
;    IPCC AR6 WGII DA
;
; CALLING SEQUENCE:
;    ipccar6dna_define_source, var_label=var_label, $
;        source_headers=source_headers, source_settings=source_settings
;
; INPUT:
;    REALM_LABEL, SELECT_VALUES, VAR_LABEL
;
; KEYWORD PARAMETERS:
;    REALM_LABEL:  An optional scalar string specifying the climate realm of 
;        the climate variable VAR_LABEL.  This is usually determined 
;        automatically, but for instance if VAR_LABEL='nobs' then it cannot be 
;        automatically determined and REALM_LABEL needs to be input.  Possible 
;        values are 'atmos' and 'ocean'.
;    SELECT_VALUES:  The optional SELECT_HEADERS keyword input for 
;        markup_read.pro.  An string vector containing a list of category/tag 
;        values for which to return the entries.  Elements must be of the form 
;        'header=value' or 'header\=value' (equal and not equal respectively).  
;        For instance, in order to only return entries from the CMIP6 project, 
;        enter 'MIP_label=CMIP6'.
;    SOURCE_HEADERS:  Returns the N_HEADER labels for the variables returned in 
;        the first dimension of SOURCE_SETTINGS.
;    SOURCE_SETTINGS:  Returns the values for the various variables and 
;        sources.  A string array of size N_HEADER,N_SETTING.
;    VAR_LABEL:  A required scalar string specifying the CMOR label of the 
;        climate variable of interest, e.g. 'pr' for precipitation and 'tas' 
;        for temperature.  This is required in order to specifiy the data files.
;
; OUTPUT:
;    SOURCE_HEADERS, SOURCE_SETTINGS
;
; USES:
;    $IPCCAR6DNA_DATA
;    $IPCCAR6DNA_IDL
;    ipccar6dna_map_source.xml
;    markup_read.pro
;    substitute_string.pro
;
; PROCEDURE:
;    This procedure reads the settings specified in ipccar6dna_map_source.xml.
;
; EXAMPLE:
;    export IPCCAR6DNA_IDL=$HOME/idl/papers/IPCC-AR6/
;    export IPCCAR6DNA_DATA=$HOME/data/
;    ipccar6dna_define_source, var_label='tas', $
;        select_values='MIP_label=CMIP6', source_headers=source_headers, $
;        source_settings=source_settings
;
; MODIFICATION HISTORY:
;    Written by:  Daithi A. Stone (dastone@runbox.com), 2014-10-06 (as 
;        hanseng_set_source.pro)
;    Modified:  DAS, 2020-08-23 (Converted to ipccar6dna_load_source.pro)
;    Modified:  DAS, 2020-12-02 (Added DOMAIN_LABEL keyword input and variable)
;    Modified:  DAS, 2021-01-07 (Added 'active_file' to list of settings to be 
;        specified)
;-

;***********************************************************************

PRO IPCCAR6DNA_DEFINE_SOURCE, $
    REALM_LABEL=realm_label, DOMAIN_LABEL=domain_label, $
    SELECT_VALUES=select_values, $
    VAR_LABEL=var_label, $
    SOURCE_HEADERS=source_headers, SOURCE_SETTINGS=source_settings

;***********************************************************************
; Constants

; Get the IPCC AR6 D&A IDL directory
spawn, 'echo ${IPCCAR6DNA_IDL}', dir_idl
dir_idl = dir_idl[0]
if dir_idl eq '' then begin
  temp = 'ERROR ipccar6dna_define_source.pro:  ' $
      + '$IPCCAR6DNA_IDL environment variable not set'
  stop, temp
endif
; Get the IPCC AR6 D&A data directory
spawn, 'echo ${IPCCAR6DNA_DATA}', dir_data
dir_data = dir_data[0]
if dir_data eq '' then begin
  temp = 'ERROR ipccar6dna_define_source.pro:  ' $
      + '$IPCCAR6DNA_DATA environment variable not set'
  stop, temp
endif

; The name of the file containing the source list
file_list = dir_idl + 'ipccar6dna_map_source.xml'

; Ensure required input
if not( keyword_set( var_label ) ) then stop

; Determine the default realm
if not( keyword_set( realm_label ) ) and not( keyword_set( domain_label ) ) $
    then begin
  if max( var_label eq [ 'pr', 'tas' ] ) eq 1 then begin
    realm_label = 'atmos'
    domain_label = 'Amon'
  endif else if var_label eq 'tos' then begin
    realm_label = 'ocean'
    domain_label = 'Omon'
  endif else begin
    stop
  endelse
endif

;***********************************************************************
; Load the source list

; Read the source list from file
select_headers = [ 'file_sftlf', 'hist-all_file', 'hist-nat_file', $
    'institute_label', 'MIP_label', 'noise_file', 'resolution_atmos', $
    'resolution_ocean', 'source_label', 'source_name', 'var_label', $
    'domain_label', 'observed_file', 'active_file' ]
markup_read, file_list, comment_char=';', select_headers=select_headers, $
    select_values=select_values, settings=source_settings, $
    headers=source_headers

; Substitute data directory string
source_settings = string_substitute( source_settings, '${DIR_DATA}', dir_data, $
    regex=1, robust=1 )
; Substitute variable label
source_settings = string_substitute( source_settings, '${VAR_LABEL}', $
    var_label, regex=1, robust=1 )
; Substitute realm label
source_settings = string_substitute( source_settings, '${REALM_LABEL}', $
    realm_label, regex=1, robust=1 )
; Substitute domain label
source_settings = string_substitute( source_settings, '${DOMAIN_LABEL}', $
    domain_label, regex=1, robust=1 )

; Restrict according to availability of variable VAR_LABEL
id_var = where( select_headers eq 'var_label', n_id_var )
if n_id_var ne 1 then stop
temp_pos = strpos( ',' + source_settings[id_var[0],*] + ',', $
    ',' + var_label + ',' )
id = where( ( source_settings[id_var[0],*] eq '' ) or ( temp_pos ge 0 ), $
    n_source )
if n_source eq 0 then stop
source_settings = source_settings[*,id]

;***********************************************************************
; The End

return
END
