;+
; NAME:
;    ipccar6dna_region_mask.pro
;
; PURPOSE:
;    This function produces a gridded mask for use in the IPCC AR6 WGII 
;    detection and attribution analyses.
;
; CATEGORY:
;    IPCC AR6 WGII DA
;
; CALLING SEQUENCE:
;    result = ipccar6dna_region_mask( region_def=region_def )
;
; INPUT:
;    LAT, LON, REGION_DEF
;
; KEYWORD PARAMETERS:
;    LAT:  An optional input float vector specifying the latitude coordinates 
;        of the target grid.  If not input, then this returns a default 
;        latitude dimension vector.
;    LON:  An optional input float vector specifying the longitude coordinates 
;        of the target grid.  If not input, then this returns a default 
;        longitude dimension vector.
;    MIN_SIZE:  An optional float scalar specifying the minimum size, in 
;        degrees, of each region in both the longitude and latitude 
;        dimensions.  If the region definition is smaller than this minimum in 
;        one of the dimensions, then the region is stretched in that dimension 
;        such that it is MIN_SIZE degrees wide.
;    REGION_DEF:  A required "&"-delimited scalar string or vector string 
;        defining the region to load.  Supported formats are:
;        * 'box=<southwestern>;<northeastern>':  A longitude-latitude box, with 
;          the southwestern corner is defined in comma-delimited form such as 
;          <southwestern>='40E,10N' or '10N,40E', similarly for the 
;          northeastern corner <northeastern>, and the 'N', 'S', 'E', and 'W' 
;          directions are required.
;        * 'FIPS-1=<code>':  The FIPS-1 code <code> of a provincial-level 
;          administrative unit.
;        * 'HASC-1=<code>':  The HASC-1 code <code> of a provincial-level 
;          administrative unit.
;        * 'ISO-0=<code>':  The three-character ISO-0 code <code> of a country.
;        * 'NAME-0=<name>':  The standard name <name> of a country.
;        * 'NAME-1=<name>':  The standard name <name> of a provincial-level 
;          administrative unit.
;        * 'WRAF<size>-v4-1=<id>':  The <size>Mm2 WRAF v4.1 region with 
;          identifier <id>, e.g. 'WRAF05-v4-1=1.1.1.1' for Yukon.  See Stone 
;          (2019, 10.1007/s10584-019-02479-6).
;
; OUTPUT:
;    RESULT (MASK_DATA):  A float array containing the longitude-latitude mask 
;        for the region.  Values range from 0 (not part of region) to 1 
;        (entirely part of region).
;    LAT, LON
;
; USES:
;    ne_10m_admin_0_countries.nc
;    ne_10m_admin_1_states_provinces.nc
;    extract_region.pro
;    mask_lonlattime.pro
;    netcdf_read.pro
;    netcdf_read_geo.pro
;    plus.pro
;    string_substitute.pro
;
; PROCEDURE:
;    This procedure interprets region definitions in order to produce a gridded 
;    region mask array.
;
; EXAMPLE:
;    See ipccar6dna_conf_load_data.pro.
;
; MODIFICATION HISTORY:
;    Written by:  Daithi A. Stone (dastone@runbox.com), 2020-09-25
;    Modified:  DAS, 2020-11-30 (Corrected inclusion of all-land)
;    Modified:  DAS, 2020-12-03 (Added flexibility to multi-region-definition 
;        formatting)
;    Modified:  DAS, 2020-12-07 (Added standardisation of longitudes when 
;        comparing against target longitude)
;    Modified:  DAS, 2021-03-01 (Added FIPS-1 region definition capability;  
;        Fixed issue with provincial-level coordinates;  Added MIN_SIZE keyword 
;        input)
;    Modified:  DAS, 2021-12-20 (Added documentation)
;-

;***********************************************************************

FUNCTION IPCCAR6DNA_REGION_MASK, $
    LAT=lat_data, LON=lon_data, $
    MASK_FILE=mask_file, $
    MIN_SIZE=min_size, $
    REGION_DEF=region_def

;***********************************************************************
; Options and constants

; The default administrative division file names
spawn, 'echo ${IPCCAR6DNA_DATA}', dir_data
dir_data = dir_data[0]
iso0_file = dir_data + 'nation/Natural_Earth/fixed/Admin0_v3.1.0_10km/' $
    + 'ne_10m_admin_0_countries.nc'
hasc1_file = dir_data + 'nation/Natural_Earth/fixed/Admin1_v3.0.0_10km/' $
    + 'ne_10m_admin_1_states_provinces.nc'
; The default WRAF v4.1 mask files
wraf_mask_dir = dir_data + 'WRAF/v4-1/fx/atmos/region/4-1-0/'

; The default map grid
if not( keyword_set( lon_data ) ) then begin
  temp = netcdf_read_geo( iso0_file, '', lon=lon_data )
endif
n_lon = n_elements( lon_data )
d_lon = lon_data[1] - lon_data[0]
if not( keyword_set( lat_data ) ) then begin
  temp = netcdf_read_geo( iso0_file, '', lat=lat_data )
endif
n_lat = n_elements( lat_data )
d_lat = lat_data[1] - lat_data[0]

; The number of distinct component regions
n_region = n_elements( region_def )
if n_region eq 0 then stop
if n_region eq 1 then begin
  region_def_split = strsplit( region_def[0], '&', extract=1, count=n_region )
endif else begin
  region_def_split = region_def
endelse
if max( region_def_split eq '' ) eq 1 then stop

;***********************************************************************
; Generate mask

; Load the national administrative area data if needed
if ( max( strpos( region_def_split, 'ISO-0=' ) ) ge 0 ) $
    or ( max( strpos( region_def_split, 'NAME-0=' ) ) ge 0 ) then begin
  ; Load the data
  iso0_data = netcdf_read_geo( iso0_file, 'nation', lon=iso0_lon, $
      lat=iso0_lat )
  iso0_data = round( reform( iso0_data ) )
  iso0_label = string( netcdf_read( iso0_file, 'iso0' ) )
  iso0_name = string( netcdf_read( iso0_file, 'name0' ) )
  n_iso0_lon = n_elements( iso0_lon )
  n_iso0_lat = n_elements( iso0_lat )
endif
; Load the provincial administrative area data if needed
if ( max( strpos( region_def_split, 'HASC-1=' ) ) ge 0 ) $
    or ( max( strpos( region_def_split, 'FIPS-1=' ) ) ge 0 ) $
    or ( max( strpos( region_def_split, 'NAME-1=' ) ) ge 0 ) then begin
  ; Load the data
  hasc1_data = netcdf_read_geo( hasc1_file, 'nation', lon=hasc1_lon, $
      lat=hasc1_lat )
  hasc1_data = round( reform( hasc1_data ) )
  hasc1_label = string( netcdf_read( hasc1_file, 'hasc1' ) )
  hasc1_fips = string( netcdf_read( hasc1_file, 'fips1' ) )
  hasc1_name = string( netcdf_read( hasc1_file, 'name1' ) )
  n_hasc1_lon = n_elements( hasc1_lon )
  n_hasc1_lat = n_elements( hasc1_lat )
endif
if keyword_set( n_hasc1_lon ) then begin
  if keyword_set( n_iso0_lon ) then begin
    if n_hasc1_lon ne n_iso0_lon then stop
    if n_hasc1_lat ne n_iso0_lat then stop
    if hasc1_lon[0] ne iso0_lon[0] then stop
    if hasc1_lat[0] ne iso0_lat[0] then stop
  endif else begin
    n_iso0_lon = n_hasc1_lon
    n_iso0_lat = n_hasc1_lat
    iso0_lon = hasc1_lon
    iso0_lat = hasc1_lat
  endelse
endif

; Initialise mask array
mask_data = fltarr( n_lon, n_lat )

; Iterate through component regions
for i_region = 0, n_region - 1 do begin
  ; Split overlapping definitions (e.g. box and land)
  temp_region_def = strsplit( region_def_split[i_region], '/&', extract=1, $
      count=n_temp_region_def )
  ; Initialise temporary mask
  temp_mask_data = 1. + fltarr( n_lon, n_lat )
  ; Iterate through definitions
  for i_def = 0, n_temp_region_def - 1 do begin
    ; Split into type and specifics
    temp_def = strsplit( temp_region_def[i_def], '=', extract=1, $
        count=n_temp_def )
    if n_temp_def ne 2 then stop
    temp_def = strtrim( temp_def, 2 )
    ; If this is a rectangular polar-grid box
    if temp_def[0] eq 'box' then begin
      ; Determine limits
      temp_limit = strsplit( temp_def[1], ';', extract=1, $
          count=n_temp_limit )
      if n_temp_limit ne 2 then stop
      ; Convert to standard extract_region.pro format by matching corners and 
      ; cardinal directions
      temp_limit_new = !values.f_nan * fltarr( 4 )
      temp_cardinal = [ 'W', 'E', 'S', 'N' ]
      temp_index= [ 0, 0, 1, 1 ]
      temp_sign = [ -1, 1, -1, 1 ]
      for i_corner = 0, 1 do begin
        temp_corner = strsplit( temp_limit[i_corner], ',', extract=1, $
            count=n_temp_corner )
        if n_temp_corner ne 2 then stop
        for i_cardinal = 0, n_elements( temp_cardinal ) - 1 do begin
          id = where( strpos( temp_corner, temp_cardinal[i_cardinal] ) ge 0, $
              n_id )
          for i_id = 0, n_id - 1 do begin
            temp_limit_new[i_corner*2+temp_index[i_cardinal]] $
                = temp_sign[i_cardinal] $
                * float( string_substitute( temp_corner[id[i_id]], $
                temp_cardinal[i_cardinal] ) )
          endfor
        endfor
      endfor
      ; If no entries have been added, then assume S-most,W-most,N-most,E-most 
      ; format
      id = where( finite( temp_limit_new ) eq 1, n_id )
      if n_id eq 0 then begin
        temp_limit = float( temp_limit[[1,0,3,2]] )
      endif else if n_id ne 4 then begin
        stop
      endif else begin
        temp_limit = temp_limit_new
      endelse
      ; Enlarge to minimum size (in degrees) if necessary
      if keyword_set( min_size ) then begin
        if temp_limit[2] - temp_limit[0] lt min_size then begin
          temp_limit[[0,2]] = mean( temp_limit[[0,2]] ) $
              + [ -0.5, 0.5 ] * min_size
        endif
        if temp_limit[3] - temp_limit[1] lt min_size then begin
          temp_limit[[1,3]] = mean( temp_limit[[1,3]] ) $
              + [ -0.5, 0.5 ] * min_size
        endif
      endif
      ; Impose box on mask array
      temp = extract_region( region=temp_limit, lon=lon_data, lat=lat_data, $
          noshrink=1, coverage=temp_mask_data )
    ; If this is an administrative territory code
    endif else if max( $
        temp_def[0] eq [ 'ISO-0', 'NAME-0', 'HASC-1', 'FIPS-1', 'NAME-1' ] ) $
        eq 1 then begin
      ; Extract the ISO-0 codes or country names
      temp_code = strsplit( temp_def[1], ',', extract=1, count=n_temp_code )
      ; If all territories (i.e. all land) are requested
      if ( n_temp_code eq 1 ) and ( temp_code[0] eq 'all' ) then begin
        if temp_def[0] eq 'ISO-0' then begin
          temp_mask = plus( iso0_data + 1 )
        endif else if max( temp_def[0] eq [ 'HASC-1', 'FIPS-1' ] ) eq 1 $
            then begin
          temp_mask = plus( hasc1_data + 1 )
        endif else begin
          stop
        endelse
      ; If the absence of territories (i.e. all ocean) is required
      endif else if ( n_temp_code eq 1 ) and ( temp_code[0] eq 'none' ) $
          then begin
        if temp_def[0] eq 'ISO-0' then begin
          temp_mask = 1 - plus( iso0_data + 1 )
        endif else if max( temp_def[0] eq [ 'HASC-1', 'FIPS-1' ] ) eq 1 $
            then begin
          temp_mask = 1 - plus( hasc1_data + 1 )
        endif else begin
          stop
        endelse
      ; Otherwise if individual countries are specified
      endif else begin
        ; Initialise output mask
        temp_mask = fltarr( n_iso0_lon, n_iso0_lat )
        ; Iterate through countries
        for i_code = 0, n_temp_code - 1 do begin
          ; Identify where this territory is
          if temp_def[0] eq 'ISO-0' then begin
            id_code = where( iso0_label eq temp_code[i_code], n_id_code )
          endif else if temp_def[0] eq 'NAME-0' then begin
            id_code = where( name0_label eq temp_code[i_code], n_id_code )
          endif else if temp_def[0] eq 'HASC-1' then begin
            id_code = where( hasc1_label eq temp_code[i_code], n_id_code )
          endif else if temp_def[0] eq 'FIPS-1' then begin
            id_code = where( hasc1_fips eq temp_code[i_code], n_id_code )
          endif else if temp_def[0] eq 'NAME-1' then begin
            id_code = where( name1_label eq temp_code[i_code], n_id_code )
          endif else begin
            stop
          endelse
          if n_id_code ne 1 then stop
          if max( temp_def[0] eq [ 'ISO-0', 'NAME-0' ] ) eq 1 then begin
            id = where( iso0_data eq id_code[0] + 1, n_id )
          endif else if max( temp_def[0] eq [ 'HASC-1', 'FIPS-1', 'NAME-1' ] ) $
              eq 1 then begin
            id = where( hasc1_data eq id_code[0] + 1, n_id )
          endif else begin
            stop
          endelse
          if n_id eq 0 then stop
          temp_mask[id] = 1
        endfor
      endelse
      ; Interpolate to requested grid
      id = where( lon_data gt 180., n_id )
      if n_id gt 0 then lon_data[id] = lon_data[id] - 360.
      id = where( iso0_lon gt 180., n_id )
      if n_id gt 0 then iso0_lon[id] = iso0_lon[id] - 360.
      id = where( lon_data lt -180., n_id )
      if n_id gt 0 then lon_data[id] = lon_data[id] + 360.
      id = where( iso0_lon lt -180., n_id )
      if n_id gt 0 then iso0_lon[id] = iso0_lon[id] + 360.
      temp_flag = 1
      if ( n_lon eq n_iso0_lon ) and ( n_lat eq n_iso0_lat ) then begin
        if ( max( abs( lon_data - iso0_lon ) ) / d_lon lt 0.01 ) $
            and ( max( abs( lat_data - iso0_lat ) ) / d_lat lt 0.01 ) $
            then begin
          temp_flag = 0
        endif
      endif
      if temp_flag eq 1 then begin
        temp_lon = iso0_lon
        temp_lat = iso0_lat
        mask_lonlattime, temp_mask, lon=temp_lon, lat=temp_lat, $
            mask_lon=lon_data, mask_lat=lat_data
      endif
      ; Apply this layer of mask
      temp_mask_data = temp_mask_data * temp_mask
    ; If this is an WRAF region level
    endif else if max( $
        temp_def[0] eq 'WRAF' + [ '10', '5', '2', '05', '01' ] + '-v4-1' ) $
        eq 1 then begin
      ; Convert the mask file name into the region mask file for this WRAF level
      temp_mask_file = string_substitute( wraf_mask_dir, '/atmos/', $
          '/atmos-'+temp_def[0]+'/', regex=1 )
      ;temp_mask_file = string_substitute( mask_file, 'sftlf', 'region', $
      ;    regex=1 )
      ;temp_mask_file = string_substitute( temp_mask_file, 'fx_', $
      ;    'fx-'+temp_def[0]+'_', regex=1 )
      ;temp_mask_file = string_substitute( temp_mask_file, '/mon/', '/fx/', $
      ;    regex=1 )
      ;pos = strpos( temp_mask_dir, '/', reverse_search=1 ) + 1
      ;temp_mask_dir = strmid( temp_mask_dir, 0, pos )
      ;temp_mask_file = strmid( temp_mask_file, pos, $
      ;    strlen(temp_mask_file)-pos )
      temp_mask_file = file_search( temp_mask_file+'*.nc', count=n_temp )
      if n_temp ne 1 then stop
      ; Load region mask data
      temp_region_data = netcdf_read_geo( temp_mask_file[0], 'region', $
          lon=temp_region_lon, lat=temp_region_lat, $
          realization=temp_region_id, $
          label_in_realization=['regionid,layer'] )
      temp_region_data = reform( temp_region_data )
      ; Identify specified regions
      temp_code = strsplit( temp_def[1], ',', extract=1 )
      id_region = where( isin( temp_code, temp_region_id ) eq 1, n_id_region )
      id_mask = where( isin( float( id_region ), temp_region_data ) eq 1, $
          n_id_mask )
      temp_region_data[*] = 0
      temp_region_data[id_mask] = 1
      temp_mask_data = temp_mask_data * temp_region_data
    ; If this is some other method
    endif else begin
      ; Not yet implemented
      stop
    endelse
  endfor
  ; Add this component region to mask
  mask_data = mask_data + temp_mask_data[*,*,0]
endfor

; Ensure maximum value of 1
id = where( mask_data gt 1, n_id )
if n_id gt 0 then mask_data[id] = 1.

;***********************************************************************
; The end

return, mask_data
END
