;+
; NAME:
;    ipccar6dna_conf_density.pro
;
; PURPOSE:
;     This function calculates the threshold station density required for 
;     confidence not to be reduced in ipccar6dna_conf.pro, and the consequent 
;     decrease in confidence if appropriate.
;
; CATEGORY:
;    IPCC AR6 WGII DA
;
; CALLING SEQUENCE:
;    result = ipccar6dna_conf_density( domain_label=domain_label, $
;        period=period, region_def=region_def, season_id=season_id, $
;        season_len=season_len, var_label=var_label )
;
; INPUT:
;    DOMAIN_LABEL, FRAC_ACTIVE_THRESH, N_LONLAT_MIN, PERIOD, REGION_DEF, 
;      REGION_LAT, REGION_LON, REGION_MASK, SEASON_ID, SEASON_LEN, SEED, 
;      SOURCE, VAR_LABEL
;
; KEYWORD PARAMETERS:
;    DOMAIN_LABEL:  A required scalar string specifying the domain over which 
;        the variable is analysed.  Possible values are:
;        * "atmos-land" for atmosphere over land only
;        * "atmos-ocean" for atmosphere over ocean only
;        * "ocean" for ocean only
;    FRAC_ACTIVE_THRESH:  An optional scalar float specifying the effective 
;        minimum fractional temporal coverage required for a station to be 
;        considerd in the analysis.  Note that there are assumptions on the 
;        sequencing of stations coming online and going offline (last on is 
;        also first off) that influence the interpretation of this value.  The 
;        default is just to require that a station is active at some point.
;    N_LONLAT_MIN:  An optional scalar integer specifying the minimum number of 
;        grid cells in the computational grid per longitude/latitude dimension 
;        for use when degrading the resolution for computational efficiency.  
;        If the region is very large, then fine scale random placement of a 
;        station within the mask (e.g. making making sure it falls on land and 
;        determining whether it falls within the region) does not matter 
;        numerically;  decreasing the resolution makes everything run faster.  
;        Note that grid dimensions will not be reduced to lower than the 
;        resolution of the station count data.  Also note that this input 
;        should be used carefully if the region is highly non-contiguous (i.e. 
;        when most of the area is empty space (e.g. France).  The default is so 
;        large that resoluton degradation does not happen.
;    PERIOD:  A required 2-element vector integer specifying the start year 
;        and end year, respectively, of the period to analyse.
;    REGION_DEF:  A required string specifying the region definition.  See 
;        ipccar6dna_region_mask.pro for details.
;    REGION_LAT:  A float vector of length N_REGION_LAT specifying the latitude 
;        dimension of REGION_MASK.  Required if REGION_MASK is set.
;    REGION_LON:  A float vector of length N_REGION_LON specifying the 
;        longitude dimension of REGION_MASK.  Required if REGION_MASK is set.
;    REGION_MASK:  A float array of size N_REGION_LON,N_REGION_LAT defining 
;        the regional mask to apply to the data.  Values should be in the [0,1] 
;        range.  Required if REGION_DEF is not input, otherwise ignored.
;    SEASON_ID:  A required scalar integer specifying the index of the middle 
;        (or first middle) month of the season to output.  See 
;        month_to_seasons.pro for details.  Note SEASON_ID=5, SEASON_LEN=12 
;        returns January-December annual means.
;    SEASON_LEN:  A required scalar integer specifying the number of months in 
;        the season to be extracted.  See months_to_seasons.pro for details.
;    SEED:  An optional scalar integer specifying the seed for the random 
;        number generator behind random placement of the stations within a 
;        grid cell.
;    SOURCE_LABEL:  A required scalar string specifying the observational data 
;        source to use for estimating station density.
;    VAR_LABEL:  A required scalar string specifying the CMOR label of the 
;        climate variable to analyse.  Supported values are 'pr', 'tas', 'tos'.
;
; OUTPUT:
;    RESULT:  Returns a scalar float containing the confidence adjustment 
;        factor due to station density and coverage underlying and 
;        observational data product.
;
; USES:
;    add_dim.pro
;    extract_region.pro
;    factors.pro
;    geo_dist.pro
;    geo_mean.pro
;    ipccar6dna_define_source.pro
;    ipccar6dna_region_mask.pro
;    mask_lonlattime.pro
;    netcdf_read_geo.pro
;    netcdf_read_geo_multitime.pro
;
; PROCEDURE:
;    Stone, D. A., and G. Hansen. 2016. Rapid systematic assessment of the 
;      detection and attribution of regional anthropogenic climate change. 
;      Climate Dynamics, 10.1007/s00382-015-2909-2.
;
; EXAMPLE:
;    See ipccar6dna_conf.pro.
;
; MODIFICATION HISTORY:
;    Written by:  Daithi A. Stone (dastone@runbox.com), 2015-03-23 (As 
;        hanseng_density.pro)
;    Modified:  DAS, 2020-10-29 (Adapted from hanseng_density.pro)
;    Modified:  DAS, 2020-11-20 (Added automatic early return if no stations 
;        available)
;    Modified:  DAS, 2020-12-11 (Updated station density source products;  
;        Corrected error in time indexing of var_accounted)
;    Modified:  DAS, 2021-01-11 (Added SOURCE_LABEL keyword input;  added 
;        capability to work with specified SOURCE_LABEL, including if station 
;        density is recorded in the active variable)
;    Modified:  DAS, 2021-01-17 (Fixed bugs in region extraction;  Fixed 
;        bug in identification of stations within range of mask grid cell)
;    Modified:  DAS, 2021-03-15 (Made final region_factor calculation code more 
;        efficient with memory)
;    Modified:  DAS, 2021-04-08 (Added quiet=1 option to 
;        netcdf_read_geo_multitie calls)
;    Modified:  DAS, 2021-04-15 (Corrected time counter when season is not 
;        annual)
;    Modified:  DAS, 2021-12-23 (Completed documentation)
;-

;***********************************************************************

FUNCTION IPCCAR6DNA_CONF_DENSITY, $
    DOMAIN_LABEL=domain_label, $
    FRAC_ACTIVE_THRESH=frac_active_thresh, $
    N_LONLAT_MIN=n_lonlat_min, $
    PERIOD=period, $
    REGION_DEF=region_def, $
    REGION_MASK=region_data, REGION_LON=region_lon, REGION_LAT=region_lat, $
    SEASON_ID=season_id, SEASON_LEN=season_len, $
    SEED=seed, $
    SOURCE_LABEL=source_label, $
    VAR_LABEL=VAR_LABEL

;***********************************************************************
; Options and constants

diag_time_0 = systime( /seconds )
print,'...density a'
; Only allow one variable to be input
if n_elements( var_label ) ne 1 then stop
; Require a source to be specified
if n_elements( source_label ) ne 1 then stop

; Degrees to radians factor
degrad = !pi / 180.
; Months in a year
mina = 12
; Earth's radius (km)
r_earth = 6370.95

; Define the spatial decorrelation scale (in km)
if var_label eq 'pr' then dist_decorr = 450.
if var_label eq 'tas' then dist_decorr = 1200.
if var_label eq 'tos' then dist_decorr = 1200.
if not( keyword_set( dist_decorr ) ) then stop

; The distance at which stations can be considered usable, in units of 
; dist_decorr
range_take_stat = 1.
dist_use_max = range_take_stat * dist_decorr

; A cap on the possible number of stations per source grid cell.  This ensures
; that the numerics remain stable, and that the procedure runs reasonably
; quickly.  The difference between having this number of stations and complete
; coverage ends up being minor for the final result.
n_stat_cell_max = 25

; A limit on the number of grid cells in the mask arrays.  If the region is 
; very large, then fine scale random placement of a station within the mask 
; (e.g. making making sure it falls on land and determining whether it falls 
; within the region) does not matter numerically;  decreasing the resolution 
; makes everything run faster.
; Note that mask dimensions will not be reduced to these if the station count 
; array is of higher resolution.
; Note this should be used carefully if the region is highly non-contiguous 
; (e.g. France).
if not( keyword_set( n_lonlat_min ) ) then n_lonlat_min = 100000l
n_mask_lon_0 = n_lonlat_min
n_mask_lat_0 = n_lonlat_min / 2

; Determine the station-counting/activity file and specifics
nobs_select = 'source_label=' + source_label
ipccar6dna_define_source, var_label=var_label, select_values=nobs_select, $
    source_headers=nobs_source_headers, source_settings=nobs_source_settings, $
    realm_label=nobs_realm_label
id = where( nobs_source_headers eq 'active_file', n_id )
nobs_file = nobs_source_settings[id,*]
if n_elements( nobs_file ) ne 1 then stop
nobs_file = nobs_file[0]
if strpos( nobs_file, 'active-'+var_label ) ge 0 then begin
  nobs_var_label = 'active'
endif else if strpos( nobs_file, 'nstation-'+var_label ) ge 0 then begin
  nobs_var_label = 'nstation'
endif else if nobs_file eq '' then begin
  ; This source lacks the relevant information, so we will flag it as such
  return, !values.f_nan
endif else begin
  stop
endelse

; Ensure season details are input
if n_elements( season_id ) ne 1 then stop
if n_elements( season_len ) ne 1 then stop

;***********************************************************************
; Load data

; Load land-sea mask
if domain_label eq 'atmos-land' then begin
  temp_region_def = 'ISO-0=all'
endif else if max( domain_label eq [ 'atmos-ocean', 'ocean' ] ) eq 1 then begin
  temp_region_def = 'ISO-0=none'
endif else begin
  stop
endelse
mask_landsea_data = ipccar6dna_region_mask( region_def=temp_region_def, $
    lon=mask_lon_data, lat=mask_lat_data )
id = where( mask_landsea_data lt 0, n_id )
if n_id gt 0 then mask_landsea_data[id] = 0

; Load region mask
if keyword_set( region_def ) then begin
  mask_region_data = ipccar6dna_region_mask( region_def=region_def, $
        lon=mask_lon_data, lat=mask_lat_data )
  id = where( finite( mask_region_data ) eq 0, n_id )
  if n_id gt 0 then mask_region_data[id] = 0.
  id = where( mask_landsea_data lt 0, n_id )
  if n_id gt 0 then mask_landsea_data[id] = 0
; Or interpolate inputed region mask
endif else if keyword_set( region_data ) then begin
  if not( keyword_set( region_lon ) ) then stop
  if not( keyword_set( region_lat ) ) then stop
  mask_region_data = region_data
  temp_lon = region_lon
  temp_lat = region_lat
  check = 0
  if n_elements( temp_lon ) ne n_elements( mask_lon_data ) then begin
    check = 1
  endif else if n_elements( temp_lat ) ne n_elements( mask_lat_data ) then begin
    check = 1
  endif else begin
    d_temp_lon = temp_lon[1] - temp_lon[0]
    d_temp_lat = temp_lat[1] - temp_lat[0]
    if max( abs( temp_lon - mask_lon_data ) ) gt d_temp_lon / 100. then begin
      check = 1
    endif else if max( abs( temp_lat - mask_lat_data ) ) gt d_temp_lat / 100. $
        then begin
      check = 1
    endif
  endelse
  if check eq 1 then begin
    mask_lonlattime, mask_region_data, lon=temp_lon, lat=temp_lat, $
        mask_lon=mask_lon_data, mask_lat=mask_lat_data
  endif  
endif else begin
  stop
endelse
; Determine dimension resolution
d_mask_lon = abs( mask_lon_data[2] - mask_lon_data[1] )
if abs( d_mask_lon ) gt 180 then begin
  d_mask_lon = mask_lon_data[3] - mask_lon_data[2]
endif
d_mask_lat = abs( mask_lat_data[2] - mask_lat_data[1] )

; Combine masks
mask_all_data = mask_landsea_data * mask_region_data

;; Produce mask area array
;mask_area_data = fltarr( n_mask_lon, n_mask_lat )
;for i_lat = 0, n_mask_lat - 1 do begin
;  mask_area_data[i_lon,i_lat] = cos( mask_lat_data[i_lat] * degrad )
;endfor
;mask_area_data = mask_area_data / total( mask_area_data ) * 4. * !pi $
;    * r_earth ^ 2.

; Load the measurement count data
if nobs_var_label eq 'nstation' then begin
  nobs_var_data = netcdf_read_geo_multitime( nobs_file, 'nstation', $
      lon=nobs_lon_data, lat=nobs_lat_data, time=nobs_time_data, $
      period=period, quiet=1 )
  nobs_var_data = reform( nobs_var_data )
  n_nobs_time = n_elements( nobs_time_data )
  d_nobs_lon = abs( nobs_lon_data[2] - nobs_lon_data[1] )
  if abs( d_nobs_lon ) gt 180 then begin
    d_nobs_lon = nobs_lon_data[3] - nobs_lon_data[2]
  endif
  d_nobs_lat = abs( nobs_lat_data[2] - nobs_lat_data[1] )
  id = where( finite( nobs_var_data ) eq 0, n_id )
  if n_id gt 0 then nobs_var_data[id] = 0
  id = where( nobs_lon_data gt 180., n_id )
  if n_id gt 0 then nobs_lon_data[id] = nobs_lon_data[id] - 360.
  ; Convert ship-borne measurements to effective station counts
  ; (following Jones et alii (1997))
  if domain_label eq 'ocean' then begin
    nobs_var_data = nobs_var_data / 5.
    id = where( nobs_var_data gt n_stat_cell_max, n_id )
    if n_id ne 0 then nobs_var_data[id] = n_stat_cell_max
  endif
; Load station activity data
endif else if nobs_var_label eq 'active' then begin
  stat_active = netcdf_read_geo_multitime( nobs_file, 'active', $
      time=nobs_time_data, period=period, label_in_lon='stationid,station', $
      label_in_lat=' ', no_lat_sort=1, quiet=1 )
  temp_nobs_file = strsplit( nobs_file, ',', extract=1 )
  stat_lon_data = netcdf_read_geo( temp_nobs_file[0], 'lon', $
      label_in_lon='lon,station', label_in_lat=' ', no_lat_sort=1, quiet=1 )
  stat_lon_data = reform( stat_lon_data )
  id = where( stat_lon_data gt 180., n_id )
  if n_id gt 0 then stat_lon_data[id] = stat_lon_data[id] - 360.
  stat_lat_data = netcdf_read_geo( temp_nobs_file[0], 'lat', $
      label_in_lat='lat,station', label_in_lon=' ', no_lat_sort=1, quiet=1 )
  stat_lat_data = reform( stat_lat_data )
  n_stat = n_elements( stat_lon_data )
  n_nobs_time = n_elements( nobs_time_data )
  stat_active = transpose( reform( stat_active, n_stat, n_nobs_time ) )
endif else begin
  stop
endelse

; Extract area including and around region.
; (This helps with computation as because stations further than dist_use_max 
; away are not used anyway.)
; Interpolate region mask to station count array grid
; (needed in order to ensure resolution difference is an integer)
temp_mask_data = mask_region_data
temp_lon_data = mask_lon_data
temp_lat_data = mask_lat_data
if nobs_var_label eq 'nstation' then begin
  mask_lonlattime, temp_mask_data, lon=temp_lon_data, lat=temp_lat_data, $
        mask_lon=nobs_lon_data, mask_lat=nobs_lat_data
endif
; Extract area including region
id = where( temp_mask_data eq 0, n_id )
temp_mask_data[id] = !values.f_nan
temp_mask_data = extract_region( temp_mask_data, lon=temp_lon_data, $
    lat=temp_lat_data )
temp_mask_data = 0
n_temp_lon = n_elements( temp_lon_data )
n_temp_lat = n_elements( temp_lat_data )
id = where( temp_lon_data gt 180., n_id )
if n_id gt 0 then temp_lon_data[id] = temp_lon_data[id] - 360.
; Determine coordinates of area
mask_limit = [ temp_lon_data[0], temp_lat_data[0], $
    temp_lon_data[n_temp_lon-1], temp_lat_data[n_temp_lat-1] ]
; Inflate area with a (slightly larger than) DIST_USE_MAX buffer
temp_dist_0 = 1.1 * dist_use_max / ( !pi * r_earth ) * 180.
if mask_limit[1] - temp_dist_0 le -90. then begin
  mask_limit[[0,2]] = ( mask_limit[0] + mask_limit[2] ) / 2. + [ -180., 180. ]
  mask_limit[1] = -90.
  mask_limit[3] = mask_limit[3] + temp_dist_0
endif else if mask_limit[1] + temp_dist_0 ge 90. then begin
  mask_limit[[0,2]] = ( mask_limit[0] + mask_limit[2] ) / 2. + [ -180., 180. ]
  mask_limit[1] = mask_limit[1] - temp_dist_0
  mask_limit[3] = 90.
endif else begin
  mask_limit[0] = mask_limit[0] - temp_dist_0 $
      / max( cos( mask_limit[[1,3]] / 180. * !pi ) )
  mask_limit[2] = mask_limit[2] + temp_dist_0 $
      / max( cos( mask_limit[[1,3]] / 180. * !pi ) )
  mask_limit[1] = mask_limit[1] - temp_dist_0
  mask_limit[3] = mask_limit[3] + temp_dist_0
endelse
; Extract area from station density array
if nobs_var_label eq 'nstation' then begin
  nobs_var_data = extract_region( nobs_var_data, lon=nobs_lon_data, $
      lat=nobs_lat_data, region=mask_limit )
  n_nobs_lon = n_elements( nobs_lon_data )
  n_nobs_lat = n_elements( nobs_lat_data )
  id = where( nobs_lon_data gt 180., n_id )
  if n_id gt 0 then nobs_lon_data[id] = nobs_lon_data[id] - 360.
; Extract stations
endif else if nobs_var_label eq 'active' then begin
  ; Determine which stations are within our area
  temp_lon = stat_lon_data - mask_limit[0]
  id = where( temp_lon lt 0, n_id )
  if n_id gt 0 then temp_lon[id] = temp_lon[id] + 360.
  id = where( temp_lon ge 360, n_id )
  if n_id gt 0 then temp_lon[id] = temp_lon[id] - 360.
  if mask_limit[2] gt mask_limit[0] then begin
    id = where( ( temp_lon ge 0 ) $
        and ( temp_lon le mask_limit[2] - mask_limit[0] ) $
        and ( stat_lat_data ge mask_limit[1] ) $
        and ( stat_lat_data le mask_limit[3] ), n_stat )
  endif else begin
    id = where( ( temp_lon ge 0 ) $
        and ( temp_lon le mask_limit[2] + 360 - mask_limit[0] ) $
        and ( stat_lat_data ge mask_limit[1] ) $
        and ( stat_lat_data le mask_limit[3] ), n_stat )
  endelse
  ; If there are no stations then return zero confidence factor
  if n_stat eq 0 then return, 0.
  ; Keep just these stations
  stat_active = stat_active[*,id]
  stat_lon_data = stat_lon_data[id]
  stat_lat_data = stat_lat_data[id]
endif else begin
  stop
endelse

; Adjust MASK_LIMIT to ensure extracted area from mask arrays fits extracted 
; station data area
if nobs_var_label eq 'nstation' then begin
  mask_limit = [ nobs_lon_data[0] - d_nobs_lon / 2. + d_mask_lon / 20., $
      nobs_lat_data[0] - d_nobs_lat / 2. + d_mask_lat / 20., $
      nobs_lon_data[n_nobs_lon-1] + d_nobs_lon / 2. - d_mask_lon / 20., $
      nobs_lat_data[n_nobs_lat-1] + d_nobs_lat / 2. - d_mask_lat / 20. ]
endif
; Extract area from mask arrays
temp_mask_limit = mask_limit
if temp_mask_limit[0] eq temp_mask_limit[2] - 360. then begin
  temp_mask_limit[2] = temp_mask_limit[2] - d_mask_lon / 2.
endif
temp_lon_data = mask_lon_data
temp_lat_data = mask_lat_data
mask_landsea_data = extract_region( mask_landsea_data, lon=temp_lon_data, $
    lat=temp_lat_data, region=temp_mask_limit )
;temp_lon_data = mask_lon_data
;temp_lat_data = mask_lat_data
;mask_area_data = extract_region( mask_area_data, lon=temp_lon_data, $
;    lat=temp_lat_data, region=mask_limit )
mask_all_data = extract_region( mask_all_data, lon=mask_lon_data, $
    lat=mask_lat_data, region=temp_mask_limit )
n_mask_lon = n_elements( mask_lon_data )
n_mask_lat = n_elements( mask_lat_data )
id = where( mask_lon_data gt 180., n_id )
if n_id gt 0 then mask_lon_data[id] = mask_lon_data[id] - 360.
; This code only works when the mask array is of at least as high resolution 
; as the station count array and is an integer multiple of its resolution
if nobs_var_label eq 'nstation' then begin
  if n_mask_lon lt n_nobs_lon then stop
  if ( n_mask_lon mod n_nobs_lon ) ne 0 then stop
  if n_mask_lat lt n_nobs_lat then stop
  if ( n_mask_lat mod n_nobs_lat ) ne 0 then stop
endif

; Reduce resolution of mask grid if appropriate.
if nobs_var_label eq 'nstation' then begin
  ; Determine if it is appropriate to reduce the longitude resolution
  if n_mask_lon / n_nobs_lon ge 2 then begin
    ; If the station count array resolution is greater than the threshold 
    ; resolution
    if n_nobs_lon gt n_mask_lon_0 then begin
      ; Degrade fully to n_nobs_lon
      mask_lon_factor = n_mask_lon / n_nobs_lon
    ; If the station count array resolution is less than the threshold 
    ; resolution
    endif else begin
      ; Determine common factors in the resolution ratios
      temp_factor = float( n_mask_lon_0 ) / float( n_nobs_lon )
      temp_factors = factors( n_mask_lon / n_nobs_lon )
      id = where( temp_factors ge temp_factor, n_id )
      ; If there does not seem to be scope for reduction
      if n_id eq 0 then begin
        mask_lon_factor = 1
      ; If there does seem to be scope for reduction
      endif else begin
        ; Determine degradation factor
        id_1 = where( temp_factors eq min( temp_factors[id] ) )
        id_1 = id_1[0]
        temp_factors[id_1] = 1
        mask_lon_factor = product( temp_factors, integer=1 ) 
      endelse
    endelse
  endif
  ; Determine if it is appropriate to reduce the latitude resolution
  if n_mask_lat / n_nobs_lat ge 2 then begin
    ; If the station count array resolution is greater than the threshold 
    ; resolution
    if n_nobs_lat gt n_mask_lat_0 then begin
      ; Degrade fully to n_nobs_lat
      mask_lat_factor = n_mask_lat / n_nobs_lat
    ; If the station count array resolution is less than the threshold 
    ; resolution
    endif else begin
      ; Determine common factors in the resolution ratios
      temp_factor = float( n_mask_lat_0 ) / float( n_nobs_lat )
      temp_factors = factors( n_mask_lat / n_nobs_lat )
      id = where( temp_factors ge temp_factor, n_id )
      ; If there does not seem to be scope for reduction
      if n_id eq 0 then begin
        mask_lat_factor = 1
      ; If there does seem to be scope for reduction
      endif else begin
        ; Determine degradation factor
        id_1 = where( temp_factors eq min( temp_factors[id] ) )
        id_1 = id_1[0]
        temp_factors[id_1] = 1
        mask_lat_factor = product( temp_factors, integer=1 ) 
      endelse
    endelse
  endif
endif else if nobs_var_label eq 'active' then begin
  ; For this case we only need to worry about the mask grid
  mask_lon_factor = floor( float( n_mask_lon ) / float( n_mask_lon_0 ) )
  mask_lat_factor = floor( float( n_mask_lat ) / float( n_mask_lat_0 ) )
endif else begin
  stop
endelse
; If we can degrade the resolution
if max( [ mask_lon_factor, mask_lat_factor ] ) gt 1 then begin
  ; Determine new mask dimensions
  n_mask_lon_new = n_mask_lon / mask_lon_factor
  n_mask_lat_new = n_mask_lat / mask_lat_factor
  ; Iterate through new latitude value
  for i_lat = 0, n_mask_lat_new - 1 do begin
    ; Identify old latitude values within this new latitude value
    id_lat = [ i_lat, i_lat + 1 ] * mask_lat_factor - [ 0, 1 ]
    ; Extract this latitude band
    temp_mask_landsea_data = mask_landsea_data[*,id_lat[0]:id_lat[1]]
    temp_mask_all_data = mask_all_data[*,id_lat[0]:id_lat[1]]
    ;temp_mask_area_data = mask_area_data[*,id_lat[0]:id_lat[1]]
    ; Iterate through new longitude values
    for i_lon = 0, n_mask_lon_new - 1 do begin
      ; Identify old longitude values within this new longitude value
      id_lon = [ i_lon, i_lon + 1 ] * mask_lon_factor - [ 0, 1 ]
      ; Take sum of old values
      temp = temp_mask_landsea_data[id_lon[0]:id_lon[1],*]
      mask_landsea_data[i_lon,i_lat] = mean( temp )
      temp = temp_mask_all_data[id_lon[0]:id_lon[1],*]
      mask_all_data[i_lon,i_lat] = mean( temp )
      ;temp = temp_mask_area_data[id_lon[0]:id_lon[1],*]
      ;mask_area_data[i_lon,i_lat] = total( temp )
    endfor
  endfor
  ; Extract new mask arrays
  d_mask_lon = d_mask_lon * mask_lon_factor
  n_mask_lon = n_mask_lon_new
  d_mask_lat = d_mask_lat * mask_lat_factor
  n_mask_lat = n_mask_lat_new
  mask_landsea_data = mask_landsea_data[*,0:n_mask_lat-1]
  mask_landsea_data = mask_landsea_data[0:n_mask_lon-1,*]
  mask_all_data = mask_all_data[*,0:n_mask_lat-1]
  mask_all_data = mask_all_data[0:n_mask_lon-1,*]
  ;mask_area_data = mask_area_data[*,0:n_mask_lat-1]
  ;mask_area_data = mask_area_data[0:n_mask_lon-1,*]
  ; Determine new latitude values
  for i_lat = 0, n_mask_lat - 1 do begin
    mask_lat_data[i_lat] = mean( $
        mask_lat_data[i_lat*mask_lat_factor:(i_lat+1)*mask_lat_factor-1] )
  endfor
  mask_lat_data = mask_lat_data[0:n_mask_lat-1]
  ; Determine new longitude values
  for i_lon = 0, n_mask_lon - 1 do begin
    temp = mask_lon_data[i_lon*mask_lon_factor:(i_lon+1)*mask_lon_factor-1]
    if temp[mask_lon_factor-1] gt temp[0] then begin
      mask_lon_data[i_lon] = mean( temp )
    endif else begin
      if i_lon eq 0 then begin
        id_lon = [ i_lon + 1, i_lon + 2 ] * mask_lon_factor - [ 0, 1 ]
        mask_lon_data[i_lon] = mean( mask_lon_data[id_lon[0]:id_lon[1]] ) $
            - d_mask_lon
      endif else begin
        mask_lon_data[i_lon] = mask_lon_data[i_lon-1] + d_mask_lon
      endelse
      if mask_lon_data[i_lon] lt 0 then begin
        mask_lon_data[i_lon] = mask_lon_data[i_lon] + 360.
      endif
      if mask_lon_data[i_lon] ge 360 then begin
        mask_lon_data[i_lon] = mask_lon_data[i_lon] - 360.
      endif
    endelse
  endfor
  mask_lon_data = mask_lon_data[0:n_mask_lon-1]
endif

;; The total area within the larger region
;mask_area_tot = total( mask_area_data )

;; Generate low (nobs_var_data) resolution version of region mask (for 
;; computational efficiency as a "first look")
;nobs_mask_all_data = fltarr( n_nobs_lon, n_nobs_lat )
;temp_lon_factor = n_mask_lon / n_nobs_lon
;temp_lat_factor = n_mask_lat / n_nobs_lat
;for i_lat = 0, n_nobs_lat - 1 do begin
;  id_lat = [ i_lat, i_lat + 1] * temp_lat_factor - [ 0, 1 ]
;  temp_mask = mask_all_data[*,id_lat[0]:id_lat[1]]
;  for i_lon = 0, n_nobs_lon - 1 do begin
;    id_lon = [ i_lon, i_lon + 1] * temp_lon_factor - [ 0, 1 ]
;    nobs_mask_all_data[i_lon,i_lat] = mean( temp_mask[id_lon[0]:id_lon[1],*] )
;  endfor
;endfor

; Identify time steps within the season
id_season_start = season_id - ( season_len - 1 ) / 2
if id_season_start lt 0 then id_season_start = id_season_start + mina
id_season_end = season_id + season_len / 2
if id_season_end gt mina - 1 then id_season_end = id_season_end - mina
time_flag_in_season = indgen( n_nobs_time )
if id_season_end gt id_season_start then begin
  id = where( ( ( time_flag_in_season mod mina ) ge id_season_start ) $
      and ( ( time_flag_in_season mod mina ) le id_season_end ) )
endif else begin
  id = where( ( ( time_flag_in_season mod mina ) ge id_season_start ) $
      or ( ( time_flag_in_season mod mina ) le id_season_end ) )
endelse
time_flag_in_season[*] = 0
time_flag_in_season[id] = 1
id_time_in_season = where( time_flag_in_season eq 1, n_time_in_season )
if n_time_in_season eq 0 then stop

; Remove technically inactive stations
if nobs_var_label eq 'active' then begin
  ; Determine active fraction
  temp = total( stat_active, 1 ) / n_time_in_season
  if keyword_set( frac_active_thresh ) then begin
    id = where( temp ge frac_active_thresh, n_stat )
  endif else begin
    id = where( temp gt 0., n_stat )
  endelse
  ; If there are no stations then return zero confidence factor
  if n_stat eq 0 then return, 0.
  ; Keep just these stations
  stat_active = stat_active[*,id]
  stat_lon_data = stat_lon_data[id]
  stat_lat_data = stat_lat_data[id]
endif

;***********************************************************************
; Estimate effective station coverage

; Return with zero if there are no stations
if nobs_var_label eq 'nstation' then begin
  if max( nobs_var_data ) eq 0 then return, 0.
endif

; Initialise station variables if needed
if nobs_var_label eq 'nstation' then begin
  ; Overestimate the total number of stations
  n_stat = total( $
      reform( nobs_var_data, n_nobs_lon * n_nobs_lat, n_nobs_time ), $
      1, integer=1 )
  n_stat = max( n_stat )
  ; Initialise array of possible station locations and measurement counts
  stat_lon_data = fltarr( n_stat )
  stat_lat_data = fltarr( n_stat )
  stat_active = intarr( n_nobs_time, n_stat )
  ctr_stat = 0l
endif

; Prepare for defining random cell positions within the landsea_data cells 
; within the nobs_data cell
if nobs_var_label eq 'nstation' then begin
  ; (Note this will be converted into cell IDs once we know how many available 
  ; mask_landsea_data cells there are within a nobs_var_data cell)
  stat_pos_random = 0.999 * randomu( seed, n_stat )
  ; Random position deltas within a landsea_data cell
  stat_delta_lon = ( randomu( seed, n_stat ) - 0.5 ) * d_mask_lon
  stat_delta_lat = ( randomu( seed, n_stat ) - 0.5 ) * d_mask_lat
endif

; Determine the index number for the quantile of the station activity to take
if keyword_set( frac_active_thresh ) then begin
  id_frac_active_thresh = floor( n_nobs_time * ( 1.0 - frac_active_thresh ) )
endif

; Assign gridded density to station locations
if nobs_var_label eq 'nstation' then begin
  ; Iterate through latitude of the nobs_var_data array
  for i_lat = 0, n_nobs_lat - 1 do begin
    ; Determine the latitudes from mask_landsea_data within this latitude band
    id_lat = where( $
        ( mask_lat_data ge nobs_lat_data[i_lat] - d_nobs_lat / 2. ) $
        and ( mask_lat_data lt nobs_lat_data[i_lat] + d_nobs_lat / 2. ), $
        n_id_lat )
    ; Iterate through longitude of the nobs_var_data array
    for i_lon = 0, n_nobs_lon - 1 do begin
      ; Determine the longitudes from mask_landsea_data within this longitude 
      ; band
      id_lon = where( $
          ( mask_lon_data ge nobs_lon_data[i_lon] - d_nobs_lon / 2. ) $
          and ( mask_lon_data lt nobs_lon_data[i_lon] + d_nobs_lon / 2. ), $
          n_id_lon )
      ; Extract the land-sea mask data for this nobs_var_data cell
      temp_mask = mask_landsea_data[*,id_lat]
      temp_mask = temp_mask[id_lon,*]
      id_mask = where( temp_mask ge 0.05, n_id_mask )
      ; If there is some mask area
      if n_id_mask gt 0 then begin
        ; Determine the effective number of stations within this nobs_var_data 
        ; cell (assume that stations do not move, only turn on and off)
        temp_nobs_var_data = reform( nobs_var_data[i_lon,i_lat,*] )
        if max( finite( temp_nobs_var_data ) ) eq 0 then begin
          n_stat_cell = 0
        endif else begin
          if keyword_set( frac_active_thresh ) then begin
            id = where( finite( temp_nobs_var_data ) eq 0, n_id )
            if n_id gt 0 then temp_nobs_var_data[id] = 0
            id = sort( temp_nobs_var_data )
            n_stat_cell = temp_nobs_var_data[id[id_frac_active_thresh]]
          endif else begin
            n_stat_cell = max( temp_nobs_var_data, nan=1 )
          endelse
        endelse
        ; Iterate through stations within this nobs_var_data cell
        ; (assume that stations do not move, only turn on and off)
        for i_stat = 0, n_stat_cell - 1 do begin
          ; Select the mask_landsea_data cell for this station
          id = floor( stat_pos_random[ctr_stat] * n_id_mask )
          id_stat_mask = id_mask[id]
          ; Determine the coordinates of this cell and add random deltas
          id_stat_lon = id_stat_mask mod n_id_lon
          stat_lon_data[ctr_stat] = mask_lon_data[id_lon[id_stat_lon]] $
              + stat_delta_lon[ctr_stat]
          id_stat_lat = id_stat_mask / n_id_lon
          stat_lat_data[ctr_stat] = mask_lat_data[id_lat[id_stat_lat]] $
              + stat_delta_lat[ctr_stat]
          ; Record the station activity
          id = where( temp_nobs_var_data ge n_stat_cell - i_stat, n_id )
          if n_id eq 0 then stop
          stat_active[id,ctr_stat] = 1
          ; Increment station counter
          ctr_stat = ctr_stat + 1l
        endfor
      endif
    endfor
  endfor
  ; Clear memory
  id_mask = 0
  ; Remove (effectively) nonexistant stations from our list
  stat_lon_data = stat_lon_data[0l:ctr_stat-1l]
  stat_lat_data = stat_lat_data[0l:ctr_stat-1l]
  stat_active = stat_active[*,0l:ctr_stat-1l]
  n_stat = ctr_stat
endif

; The threshold for considering this to be a grid cell within the region.
; This takes the cell coverage that is half the maximum individual cell 
; coverage in the region, ensuring that regions that are diffuse across cells 
; are included.
thresh_mask_cell = max( mask_all_data ) / 2.
if thresh_mask_cell eq 0 then stop

; Determine the geographic coordinates of cells within the mask
id_mask_all = where( mask_all_data gt thresh_mask_cell, n_id_mask_all )
if n_id_mask_all eq 0 then stop

; If we have a sufficient number of grid cells within the region to consider 
; reducing the resolution
;n_id_mask_min = 100l ^ 2
n_id_mask_min = 50l ^ 2
if n_id_mask_all ge n_id_mask_min * 2 then begin
  ; Factor the longitude and latitude dimensions
  lon_factor = factors( n_mask_lon )
  lat_factor = factors( n_mask_lat )
  ; Flag that we should be trying to degrade
  flag_degrade = 0
  ; Iterate until we are told to exit
  while flag_degrade eq 0 do begin
    ; Determine whether the longitude or latitude dimension is larger
    flag_lon = n_mask_lon ge n_mask_lat
    if max( lon_factor ) eq 1 then flag_lon = 0
    flag_lat = 1 - flag_lon
    if max( lat_factor ) eq 1 then begin
      flag_lat = 0
      if max( lon_factor ) ne 1 then flag_lon = 1
    endif
    ; If the longitude dimension is larger then work on it
    if flag_lon eq 1 then begin
      ; If it cannot be factored further (including if we have tried but all 
      ; of the factors make the grid cell count too small)
      if max( lon_factor eq [ 1, n_mask_lon ] ) eq 1 then begin
        ; Switch to try the latitude dimension
        flag_lat = 1
        lon_factor = 1
      ; If it can be factored further
      endif else begin
        ;; Take the largest remaining factor
        ;temp_factor = max( lon_factor )
        ; Take the smallest remaining factor
        temp_factor = min( lon_factor )
        ; Adopt a smaller dimension determined from this factor
        n_mask_lon_new = n_mask_lon / temp_factor
        mask_all_data_new = fltarr( n_mask_lon_new, n_mask_lat )
        mask_lon_data_new = fltarr( n_mask_lon_new )
        ; Interpolate to lower resolution
        for i_lon = 0, n_mask_lon_new - 1 do begin
          id_lon = [ i_lon, i_lon + 1 ] * temp_factor + [ 0, -1 ]
          mask_all_data_new[i_lon,*] = mean( $
              mask_all_data[id_lon[0]:id_lon[1],*], dimension=1 )
          mask_lon_data_new[i_lon] = mean( mask_lon_data[id_lon[0]:id_lon[1]] )
        endfor
        ; If this lower resolution satisfies the limit on grid cell count
        id = where( mask_all_data_new gt thresh_mask_cell, n_id )
        if n_id gt n_id_mask_min then begin
          ; Adopt the lower resolution dimension
          mask_all_data = temporary( mask_all_data_new )
          mask_lon_data = temporary( mask_lon_data_new )
          n_mask_lon = n_mask_lon_new
          d_mask_lon = d_mask_lon * temp_factor
          id_mask_all = temporary( id )
          n_id_mask_all = n_id
          id = where( lon_factor eq temp_factor )
          lon_factor[id[0]] = -1
        ; If this lower resolution does not satisfy the limit
        endif else begin
          mask_all_data_new = 0
          mask_lon_data_new = 0
          id = where( lon_factor eq temp_factor )
          lon_factor[id] = -1
        endelse
        ; Remove this factor
        id = where( lon_factor ne -1, n_id )
        if n_id gt 0 then begin
          lon_factor = lon_factor[id]
        endif else begin
          lon_factor = 1
        endelse
      endelse
    endif
    ; If the latitude dimension is larger (or the longitude dimension cannot be 
    ; reduced further) then work on it
    if flag_lat eq 1 then begin
      ; If it cannot be factored further (including if we have tried but all 
      ; of the factors make the grid cell count too small)
      if max( lat_factor eq [ 1, n_mask_lat ] ) eq 1 then begin
        lat_factor = 1
      ; If it can be factored further
      endif else begin
        ;; Take the largest remaining factor
        ;temp_factor = max( lat_factor )
        ; Take the smallest remaining factor
        temp_factor = min( lat_factor )
        ; Adopt a smaller dimension determined from this factor
        n_mask_lat_new = n_mask_lat / temp_factor
        mask_all_data_new = fltarr( n_mask_lon, n_mask_lat_new )
        mask_lat_data_new = fltarr( n_mask_lat_new )
        ; Interpolate to lower resolution
        for i_lat = 0, n_mask_lat_new - 1 do begin
          id_lat = [ i_lat, i_lat + 1 ] * temp_factor + [ 0, -1 ]
          mask_all_data_new[*,i_lat] = mean( $
              mask_all_data[*,id_lat[0]:id_lat[1]], dimension=2 )
          mask_lat_data_new[i_lat] = mean( mask_lat_data[id_lat[0]:id_lat[1]] )
        endfor
        ; If this lower resolution satisfies the limit on grid cell count
        id = where( mask_all_data_new gt thresh_mask_cell, n_id )
        if n_id gt n_id_mask_min then begin
          ; Adopt the lower resolution dimension
          mask_all_data = temporary( mask_all_data_new )
          mask_lat_data = temporary( mask_lat_data_new )
          n_mask_lat = n_mask_lat_new
          d_mask_lat = d_mask_lat * temp_factor
          id_mask_all = temporary( id )
          n_id_mask_all = n_id
          id = where( lat_factor eq temp_factor )
          lat_factor[id[0]] = -1
        ; If this lower resolution does not satisfy the limit
        endif else begin
          mask_all_data_new = 0
          mask_lat_data_new = 0
          id = where( lat_factor eq temp_factor )
          lat_factor[id] = -1
        endelse
        ; Remove this factor
        id = where( lat_factor ne -1, n_id )
        if n_id gt 0 then begin
          lat_factor = lat_factor[id]
        endif else begin
          lat_factor = 1
        endelse
      endelse
    endif
    ; If we cannot factor further
    if max( [ lon_factor, lat_factor ] ) eq 1 then begin
      flag_degrade = 1
    endif
  endwhile
endif

; Determine the minimum grid cell size in km
d_mask_lat_km = d_mask_lat * degrad * r_earth
d_mask_lon_km = d_mask_lon * min( cos( mask_lat_data[id_mask_all] * degrad ) ) $
    * degrad * r_earth

diag_time_1 = systime( /seconds )
print, '...density b', diag_time_1-diag_time_0
; Initialise array of variance accounted for by the given stations
var_accounted = fltarr( n_id_mask_all, n_time_in_season )

; Iterate through region mask grid cells
for i_mask = 0l, n_id_mask_all - 1l do begin
  id_lon = id_mask_all[i_mask] mod n_mask_lon
  id_lat = id_mask_all[i_mask] / n_mask_lon
  ; Determine whether this cell is in the region
  if max( mask_all_data[id_lon,id_lat] ) gt 0 then begin
    ; Identify stations within range of this cell
    ; (efficient method for large regions)
    if ( d_mask_lon_km * n_mask_lon gt 2.5 * dist_use_max ) $
        or ( d_mask_lat_km * n_mask_lat gt 2.5 * dist_use_max ) then begin
      temp_lon = abs( stat_lon_data - mask_lon_data[id_lon] ) / d_mask_lon $
          * d_mask_lon_km
      temp_lat = abs( stat_lat_data - mask_lat_data[id_lat] ) / d_mask_lat $
          * d_mask_lat_km
      id_stat_1 = where( ( temp_lon le dist_use_max ) $
          and ( temp_lat le dist_use_max ), n_id_stat_1 )
      if n_id_stat_1 gt 0 then begin
        dist_stat = geo_dist( mask_lon_data[id_lon], mask_lat_data[id_lat], $
            stat_lon_data[id_stat_1], stat_lat_data[id_stat_1] )
        id_stat = where( dist_stat le dist_use_max, n_id_stat )
        if n_id_stat gt 0 then begin
          dist_stat = dist_stat[id_stat]
          id_stat = id_stat_1[id_stat]
        endif
      endif else begin
        n_id_stat = 0
      endelse
    ; (efficient method for small regions)
    endif else begin
      dist_stat = geo_dist( mask_lon_data[id_lon], mask_lat_data[id_lat], $
          stat_lon_data, stat_lat_data )
      id_stat = where( dist_stat le dist_use_max, n_id_stat )
      if n_id_stat gt 0 then dist_stat = dist_stat[id_stat]
    endelse
    ; If we have stations within range
    if n_id_stat gt 0 then begin
      ; Estimate the fractional variance shared between this cell and the 
      ; stations within range
      var_frac_shared = exp( -2. * dist_stat / dist_decorr )
      ; Iterate through time
      for i_time = 0, n_time_in_season - 1 do begin
        id_time = id_time_in_season[i_time]
        ; Identify active stations
        id_active = where( stat_active[id_time,id_stat] eq 1, n_id_active )
        if n_id_active gt 0 then begin
          ; Estimate the fractional variance shared between this cell and the 
          ; stations within range, accounting for the variance shared between 
          ; the stations
          ; (this assumes that the stations are randomly placed but we do not 
          ; know where, i.e. in contradiction of the fact that we have placed 
          ; them)
          ; (note this gets normalised for time later)
          var_accounted[i_mask,i_time] = 1. - $
              product( 1. - var_frac_shared[id_active] )
        endif
      endfor
    endif
  endif
endfor

diag_time_2 = systime( /seconds )
print, '...density c', diag_time_2-diag_time_1
; Calculate area weightings for the grid cells
; (double precision noticeably improves numerical accuracy)
id_lat = id_mask_all / n_mask_lon
mask_weight_data = cos( !dpi / 180.d * mask_lat_data[id_mask_all] )
mask_weight_data = mask_weight_data * mask_all_data[id_mask_all]
mask_weight_data = mask_weight_data / total( mask_weight_data )
; Add up all variance in space and average in time to estimate the fraction 
; of the variance for this region sampled by available stations
; (Replaced commented code with more memory-efficient code
;region_factor $
;    = total( add_dim( mask_weight_data, 1, n_nobs_time ) * var_accounted ) $
;    / total( mask_weight_data ) / n_time_in_season
region_factor = 0.
for i_time = 0, n_time_in_season - 1 do begin
  region_factor = region_factor $
      + total( mask_weight_data * var_accounted[*,i_time] )
endfor
region_factor = region_factor / total( mask_weight_data ) / n_time_in_season
mask_weight_data = 0
var_accounted = 0

;***********************************************************************
; Repeat calculation for small regions.
; If the region is small, this calculation can be sensitive to the random seed 
; used when there are few stations, so it is good to repeat and take the middle 
; value

; Determine the area
region_area = total( mask_all_data ) * 4. * !pi * ( 6370.95 ^ 2. )

; Repeat calculation more times for small region
if region_area lt 25.e4 then begin
  ; The total number of calculations
  n_calc = 3
  ; Adopt existing calculation
  region_factor = [ region_factor, fltarr( n_calc - 1 ) ]
  ; Add more calculations
  for i_calc = 0, n_calc - 2 do begin
    region_factor[1+i_calc] = ipccar6dna_conf_density( $
        domain_label=domain_label, frac_active_thresh=frac_active_thresh, $
        period=period, region_def=region_def, season_id=season_id, $
        season_len=season_len, seed=seed, var_label=var_label )
  endfor
  ; Take the median value
  region_factor = median( region_factor )
endif

; A numerical correction
if region_factor gt 1. then region_factor = 1.
; Return to single float precision
region_factor = float( region_factor )

;***********************************************************************
; The end

;stop
return, region_factor
END
