diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml new file mode 100644 index 0000000000..812e750d09 --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml @@ -0,0 +1,47 @@ +--- +# Author: 2026-01-29 Kristi Webb + +# Common global attributes for Cmorizer output +attributes: + + project_id: CMIP6 + dataset_id: MODIS-Aqua + modeling_realm: atmos + type: sat + tier: 3 + version: MYD08_M3 + source: https://ladsweb.modaps.eosdis.nasa.gov/search/order + reference: modis1 + + filename_fmt: "MYD08_M3.A*.hdf" + +variables: + clwvi: + raw_name: Cloud_Water_Path_Liquid_Mean_Mean + mip: Amon + frequency: mon + clivi: + raw_name: Cloud_Water_Path_Ice_Mean_Mean + mip: Amon + frequency: mon + clt: + raw_name: Cloud_Fraction_Mean_Mean + mip: Amon + frequency: mon + lwpStderr: + raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty + mip: custom + frequency: mon + iwpStderr: + raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty + mip: custom + frequency: mon + od550aer: + raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean + mip: AERmon + frequency: mon + reference: modis2 + reffclwtop: + raw_name: Cloud_Effective_Radius_Liquid_Mean_Mean + mip: AERmon + frequency: mon diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml new file mode 100644 index 0000000000..e30f90835f --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml @@ -0,0 +1,47 @@ +--- +# Author: 2026-01-29 Kristi Webb + +# Common global attributes for Cmorizer output +attributes: + + project_id: CMIP6 + dataset_id: MODIS-Terra + modeling_realm: atmos + type: sat + tier: 3 + version: MOD08_M3 + source: https://ladsweb.modaps.eosdis.nasa.gov/search/order + reference: modis1 + + filename_fmt: "MOD08_M3.A*.hdf" + +variables: + clwvi: + raw_name: Cloud_Water_Path_Liquid_Mean_Mean + mip: Amon + frequency: mon + clivi: + raw_name: Cloud_Water_Path_Ice_Mean_Mean + mip: Amon + frequency: mon + clt: + raw_name: Cloud_Fraction_Mean_Mean + mip: Amon + frequency: mon + lwpStderr: + raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty + mip: custom + frequency: mon + iwpStderr: + raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty + mip: custom + frequency: mon + od550aer: + raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean + mip: AERmon + frequency: mon + reference: modis2 + reffclwtop: + raw_name: Cloud_Effective_Radius_Liquid_Mean_Mean + mip: AERmon + frequency: mon diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 5997f13864..f0b69db7b0 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -1082,10 +1082,10 @@ datasets: info: | Download the file MPI_MOBO-DIC_2004-2019_v2.nc - MODIS: + MODIS-Aqua: tier: 3 source: https://ladsweb.modaps.eosdis.nasa.gov/search/order - last_access: 2019-02-09 + last_access: 2026-01-30 info: | In Products: select "MODIS Aqua", "Collection 6.1" and "L3 Atmosphere Product", click on MYD08_M3. @@ -1100,6 +1100,24 @@ datasets: grid-box averages --> multiply MODIS clwvi and clivi values with cloud fraction as a first guess + MODIS-Terra: + tier: 3 + source: https://ladsweb.modaps.eosdis.nasa.gov/search/order + last_access: 2026-01-30 + info: | + In Products: select "MODIS Terra", "Collection 6.1" and + "L3 Atmosphere Product", click on MOD08_M3. + In Time: select from 2000-01-01 to today. + In Location: skip, the global domain will be applied. + In Files: select all. + Submit the order. + A registration is required to download the data. + + Caveats + clwvi and clivi data are in-cloud values whereas CMIP5 models provide + grid-box averages --> multiply MODIS clwvi and clivi values with cloud + fraction as a first guess + MTE: tier: 3 source: http://www.bgc-jena.mpg.de/geodb/BGI/Home diff --git a/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl b/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl new file mode 100644 index 0000000000..f1829bd6b6 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl @@ -0,0 +1,253 @@ +; ############################################################################# +; ESMValTool CMORizer for MODIS data +; ############################################################################# +; +; Tier +; Tier 3: restricted dataset. +; +; Source +; https://ladsweb.modaps.eosdis.nasa.gov/search/order +; +; Last access +; 20190209 +; +; Download and processing instructions +; In Products: select "MODIS Aqua", "Collection 6.1" and +; "L3 Atmosphere Product", click on MYD08_M3. +; In Time: select from 2000-01-01 to today. +; In Location: skip, the global domain will be applied. +; In Files: select all. +; Submit the order. +; A registration is required to download the data. +; +; Caveats +; clwvi and clivi data are in-cloud values whereas CMIP5 models provide +; grid-box averages --> multiply MODIS clwvi and clivi values with cloud +; fraction as a first guess +; +; Modification history +; 20180209-righi_mattia: fixed bug in lwpStderr. +; 20180209-hassler_birgit: adapted to v2. +; 20180810-righi_mattia: fix minor calendar issue. +; 20180806-righi_mattia: code cleaning. +; 20170116-lauer_axel: using cirrus fraction to gridbox averages. +; 20160408-lauer_axel: added processing of uncertainties. +; 20151118-lauer_axel: bugfix: added unit conversion. +; 20150430-evaldsson_martin: written. +; +; ############################################################################# +loadscript(getenv("esmvaltool_root") + \ + "/data/formatters/interface.ncl") + +begin + + ; Script name (for logger) + DIAG_SCRIPT = "modis_aqua.ncl" + + ; Source name + OBSNAME = "MODIS-Aqua" + + ; Tier + TIER = 3 + + ; Selected variable (standard name) + VAR = (/"clwvi", \ + "clivi", \ + "clt", \ + "lwpStderr", \ + "iwpStderr", \ + "od550aer", \ + "reffclwtop"/) + + ; Name in the raw data + NAME = (/"Cloud_Water_Path_Liquid_Mean_Mean", \ + "Cloud_Water_Path_Ice_Mean_Mean", \ + "Cloud_Fraction_Mean_Mean", \ + "Cloud_Water_Path_Liquid_Mean_Uncertainty", \ + "Cloud_Water_Path_Ice_Mean_Uncertainty", \ + "AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean", \ + "Cloud_Effective_Radius_Liquid_Mean_Mean"/) + + ; MIP + MIP = (/"Amon", "Amon", "Amon", "Amon", "Amon", "aero", "aero"/) + + ; Frequency + FREQ = (/"mon", "mon", "mon", "mon", "mon", "mon", "mon"/) + + ; Version + VERSION = "MYD08_M3" + + ; CMOR table + CMOR_TABLE = getenv("cmor_tables") + \ + (/"/cmip5/Tables/CMIP5_Amon", \ + "/cmip5/Tables/CMIP5_Amon", \ + "/cmip5/Tables/CMIP5_Amon", \ + "/custom/CMOR_lwpStderr.dat", \ + "/custom/CMOR_iwpStderr.dat", \ + "/cmip5/Tables/CMIP5_aero", \ + "/cmip5/Tables/CMIP5_aero"/) + + ; Type + TYPE = "sat" + + ; Global attributes + SOURCE = "https://ladsweb.modaps.eosdis.nasa.gov/search/order" + REF1 = "Platnick et al., IEEE Trans. Geosci. Remote Sens., " + \ + "doi:10.1109/TGRS.2002.808301, 2003." + REF2 = "Levy et al., Atmos. Meas. Tech., " + \ + "doi:10.5194/amt-6-2989-2013, 2013." + COMMENT = "" + +end + +begin + + ; List of files + FILES = systemfunc("ls -1 " + input_dir_path + VERSION + ".A*.hdf") + + do ff = 0, dimsizes(FILES) - 1 + + fin = addfile(FILES(ff), "r") + + ; Get time + infile = systemfunc("basename " + FILES(ff)) + date = yyyyddd_to_yyyymmdd(toint(str_get_cols(infile, 10, 16))) + year = toint(str_get_cols(tostring(date), 0, 3)) + month = toint(str_get_cols(tostring(date), 4, 5)) + dm = days_in_month(year, month) + + ; Loop over variables to fetch from input file + do vv = 0, dimsizes(VAR) - 1 + + invar = fin->$NAME(vv)$ + invar_fv = invar@_FillValue + invar_coords = invar + invar := tofloat(invar) + invar := where(invar.eq.tofloat(invar_fv), \ + default_fillvalue("float"), invar) + + ; Special case clwvi as the sum lwp + iwp + if (VAR(vv).eq."clwvi") then + if (NAME(vv).ne."Cloud_Water_Path_Liquid_Mean_Mean") then + error_msg("f", DIAG_SCRIPT, "", "cannot calculate clwvi") + end if + + ; Read cirrus fraction + ; cfin = fin->Cirrus_Fraction_SWIR_FMean + cfin = fin->Cirrus_Fraction_Infrared_FMean + cif = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + ; liquid fraction is estimated assuming random overlap, i.e. + ; ctot = 1 - (1 - cif) * (1 - lif) + ; --> lif = 1 - (1 - ctot) / (1 - cif) + delete(cfin) + cfin = fin->Cloud_Fraction_Mean_Mean + ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + delete(cfin) + cif = where(cif.gt.0.999, cif@_FillValue, cif) + lif = 1.0 - (1.0 - ctot) / (1.0 - cif) + lif = where(lif.lt.0, 0, lif) + tmpvar = fin->Cloud_Water_Path_Ice_Mean_Mean ; read ice water path + tmpvar_fv = tmpvar@_FillValue + tmpvar := tofloat(tmpvar) + tmpvar := where(tmpvar.eq.tofloat(tmpvar_fv), \ + default_fillvalue("float"), \ + tmpvar) + tmpvar = tmpvar * cif ; convert iwp in-cloud value to gridbox avg + invar = invar * lif ; convert lwp in-cloud value to grid-box avg + invar = invar + tmpvar ; clwvi = lwp + iwp + delete(tmpvar) + delete(lif) + delete(cif) + invar = 0.001 * invar ; [g/m2] --> [kg/m2] + end if + + ; lwp and iwp are in-cloud values + ; convert lwp/iwp to grid-box averages by multiplying with + ; average cloud fraction (not optimum but best we can do at the moment) + if (any((/"clivi", "iwpStderr", "lwpStderr"/) .eq. VAR(vv))) then + + ; Read cirrus fraction (0-1) + ; cfin = fin->Cirrus_Fraction_SWIR_FMean + cfin = fin->Cirrus_Fraction_Infrared_FMean + cf = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + delete(cfin) + if (VAR(vv).eq."lwpStderr") then + cfin = fin->Cloud_Fraction_Mean_Mean + ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + delete(cfin) + cif = where(cf.gt.0.999, cf@_FillValue, cf) + cf = 1.0 - (1.0 - ctot) / (1.0 - cif) + cf = where(cf.lt.0, 0, cf) + delete(cif) + delete(ctot) + end if + invar = invar * cf ; ; "grid-box average" lwp/iwp + delete(cf) + invar = 0.001 * invar ; [g/m2] --> [kg/m2] + end if + + invar@_FillValue = default_fillvalue("float") + copy_VarCoords(invar_coords, invar) + if (isatt(invar_coords, "scale_factor")) then + invar = invar * tofloat(invar_coords@scale_factor) + end if + if (isatt(invar_coords, "add_offset")) then + invar = invar + tofloat(invar_coords@add_offset) + end if + + if (VAR(vv).eq."clt") then + invar = 100.0 * invar ; [1] --> [%] + end if + + ; Create output variable + lat = fin->YDim + lon = fin->XDim + output = new((/1, dimsizes(lat), dimsizes(lon)/), float) + output!0 = "time" + output!1 = "lat" + output!2 = "lon" + output&time = cd_inv_calendar(year, month, 15, 0, 0, 0, TUNITS, 0) + output&lat = lat + output&lon = lon + output(0, :, :) = (/invar/) + delete(invar) + delete(invar_coords) + + ; Format coordinates + format_coords(output, year + sprinti("%0.2i", month) + "01", \ + year + sprinti("%0.2i", month) + dm, FREQ(vv)) + + ; Set variable attributes + tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv)) + delete(output) + output = tmp + delete(tmp) + + ; Calculate coordinate bounds + bounds = guess_coord_bounds(output, FREQ(vv)) + + ; Set global attributes + if (VAR(vv).ne."od550aer") then + gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF1, COMMENT) + else + gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF2, COMMENT) + end if + + ; Output file + DATESTR = \ + year + sprinti("%0.2i", month) + "-" + year + sprinti("%0.2i", month) + fout = output_dir_path + \ + str_join((/"OBS", OBSNAME, TYPE, str_sub_str(VERSION, "_", "-"), \ + MIP(vv), VAR(vv), DATESTR/), "_") + ".nc" + + ; Write variable + write_nc(fout, VAR(vv), output, bounds, gAtt) + delete(gAtt) + delete(output) + delete(bounds) + + end do + + end do + +end diff --git a/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl b/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl new file mode 100644 index 0000000000..2e02498a88 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl @@ -0,0 +1,253 @@ +; ############################################################################# +; ESMValTool CMORizer for MODIS data +; ############################################################################# +; +; Tier +; Tier 3: restricted dataset. +; +; Source +; https://ladsweb.modaps.eosdis.nasa.gov/search/order +; +; Last access +; 20190209 +; +; Download and processing instructions +; In Products: select "MODIS Aqua", "Collection 6.1" and +; "L3 Atmosphere Product", click on MOD08_M3. +; In Time: select from 2000-01-01 to today. +; In Location: skip, the global domain will be applied. +; In Files: select all. +; Submit the order. +; A registration is required to download the data. +; +; Caveats +; clwvi and clivi data are in-cloud values whereas CMIP5 models provide +; grid-box averages --> multiply MODIS clwvi and clivi values with cloud +; fraction as a first guess +; +; Modification history +; 20180209-righi_mattia: fixed bug in lwpStderr. +; 20180209-hassler_birgit: adapted to v2. +; 20180810-righi_mattia: fix minor calendar issue. +; 20180806-righi_mattia: code cleaning. +; 20170116-lauer_axel: using cirrus fraction to gridbox averages. +; 20160408-lauer_axel: added processing of uncertainties. +; 20151118-lauer_axel: bugfix: added unit conversion. +; 20150430-evaldsson_martin: written. +; +; ############################################################################# +loadscript(getenv("esmvaltool_root") + \ + "/data/formatters/interface.ncl") + +begin + + ; Script name (for logger) + DIAG_SCRIPT = "modis_terra.ncl" + + ; Source name + OBSNAME = "MODIS-Terra" + + ; Tier + TIER = 3 + + ; Selected variable (standard name) + VAR = (/"clwvi", \ + "clivi", \ + "clt", \ + "lwpStderr", \ + "iwpStderr", \ + "od550aer", \ + "reffclwtop"/) + + ; Name in the raw data + NAME = (/"Cloud_Water_Path_Liquid_Mean_Mean", \ + "Cloud_Water_Path_Ice_Mean_Mean", \ + "Cloud_Fraction_Mean_Mean", \ + "Cloud_Water_Path_Liquid_Mean_Uncertainty", \ + "Cloud_Water_Path_Ice_Mean_Uncertainty", \ + "AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean", \ + "Cloud_Effective_Radius_Liquid_Mean_Mean"/) + + ; MIP + MIP = (/"Amon", "Amon", "Amon", "Amon", "Amon", "aero", "aero"/) + + ; Frequency + FREQ = (/"mon", "mon", "mon", "mon", "mon", "mon", "mon"/) + + ; Version + VERSION = "MOD08_M3" + + ; CMOR table + CMOR_TABLE = getenv("cmor_tables") + \ + (/"/cmip5/Tables/CMIP5_Amon", \ + "/cmip5/Tables/CMIP5_Amon", \ + "/cmip5/Tables/CMIP5_Amon", \ + "/custom/CMOR_lwpStderr.dat", \ + "/custom/CMOR_iwpStderr.dat", \ + "/cmip5/Tables/CMIP5_aero", \ + "/cmip5/Tables/CMIP5_aero"/) + + ; Type + TYPE = "sat" + + ; Global attributes + SOURCE = "https://ladsweb.modaps.eosdis.nasa.gov/search/order" + REF1 = "Platnick et al., IEEE Trans. Geosci. Remote Sens., " + \ + "doi:10.1109/TGRS.2002.808301, 2003." + REF2 = "Levy et al., Atmos. Meas. Tech., " + \ + "doi:10.5194/amt-6-2989-2013, 2013." + COMMENT = "" + +end + +begin + + ; List of files + FILES = systemfunc("ls -1 " + input_dir_path + VERSION + ".A*.hdf") + + do ff = 0, dimsizes(FILES) - 1 + + fin = addfile(FILES(ff), "r") + + ; Get time + infile = systemfunc("basename " + FILES(ff)) + date = yyyyddd_to_yyyymmdd(toint(str_get_cols(infile, 10, 16))) + year = toint(str_get_cols(tostring(date), 0, 3)) + month = toint(str_get_cols(tostring(date), 4, 5)) + dm = days_in_month(year, month) + + ; Loop over variables to fetch from input file + do vv = 0, dimsizes(VAR) - 1 + + invar = fin->$NAME(vv)$ + invar_fv = invar@_FillValue + invar_coords = invar + invar := tofloat(invar) + invar := where(invar.eq.tofloat(invar_fv), \ + default_fillvalue("float"), invar) + + ; Special case clwvi as the sum lwp + iwp + if (VAR(vv).eq."clwvi") then + if (NAME(vv).ne."Cloud_Water_Path_Liquid_Mean_Mean") then + error_msg("f", DIAG_SCRIPT, "", "cannot calculate clwvi") + end if + + ; Read cirrus fraction + ; cfin = fin->Cirrus_Fraction_SWIR_FMean + cfin = fin->Cirrus_Fraction_Infrared_FMean + cif = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + ; liquid fraction is estimated assuming random overlap, i.e. + ; ctot = 1 - (1 - cif) * (1 - lif) + ; --> lif = 1 - (1 - ctot) / (1 - cif) + delete(cfin) + cfin = fin->Cloud_Fraction_Mean_Mean + ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + delete(cfin) + cif = where(cif.gt.0.999, cif@_FillValue, cif) + lif = 1.0 - (1.0 - ctot) / (1.0 - cif) + lif = where(lif.lt.0, 0, lif) + tmpvar = fin->Cloud_Water_Path_Ice_Mean_Mean ; read ice water path + tmpvar_fv = tmpvar@_FillValue + tmpvar := tofloat(tmpvar) + tmpvar := where(tmpvar.eq.tofloat(tmpvar_fv), \ + default_fillvalue("float"), \ + tmpvar) + tmpvar = tmpvar * cif ; convert iwp in-cloud value to gridbox avg + invar = invar * lif ; convert lwp in-cloud value to grid-box avg + invar = invar + tmpvar ; clwvi = lwp + iwp + delete(tmpvar) + delete(lif) + delete(cif) + invar = 0.001 * invar ; [g/m2] --> [kg/m2] + end if + + ; lwp and iwp are in-cloud values + ; convert lwp/iwp to grid-box averages by multiplying with + ; average cloud fraction (not optimum but best we can do at the moment) + if (any((/"clivi", "iwpStderr", "lwpStderr"/) .eq. VAR(vv))) then + + ; Read cirrus fraction (0-1) + ; cfin = fin->Cirrus_Fraction_SWIR_FMean + cfin = fin->Cirrus_Fraction_Infrared_FMean + cf = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + delete(cfin) + if (VAR(vv).eq."lwpStderr") then + cfin = fin->Cloud_Fraction_Mean_Mean + ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset) + delete(cfin) + cif = where(cf.gt.0.999, cf@_FillValue, cf) + cf = 1.0 - (1.0 - ctot) / (1.0 - cif) + cf = where(cf.lt.0, 0, cf) + delete(cif) + delete(ctot) + end if + invar = invar * cf ; ; "grid-box average" lwp/iwp + delete(cf) + invar = 0.001 * invar ; [g/m2] --> [kg/m2] + end if + + invar@_FillValue = default_fillvalue("float") + copy_VarCoords(invar_coords, invar) + if (isatt(invar_coords, "scale_factor")) then + invar = invar * tofloat(invar_coords@scale_factor) + end if + if (isatt(invar_coords, "add_offset")) then + invar = invar + tofloat(invar_coords@add_offset) + end if + + if (VAR(vv).eq."clt") then + invar = 100.0 * invar ; [1] --> [%] + end if + + ; Create output variable + lat = fin->YDim + lon = fin->XDim + output = new((/1, dimsizes(lat), dimsizes(lon)/), float) + output!0 = "time" + output!1 = "lat" + output!2 = "lon" + output&time = cd_inv_calendar(year, month, 15, 0, 0, 0, TUNITS, 0) + output&lat = lat + output&lon = lon + output(0, :, :) = (/invar/) + delete(invar) + delete(invar_coords) + + ; Format coordinates + format_coords(output, year + sprinti("%0.2i", month) + "01", \ + year + sprinti("%0.2i", month) + dm, FREQ(vv)) + + ; Set variable attributes + tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv)) + delete(output) + output = tmp + delete(tmp) + + ; Calculate coordinate bounds + bounds = guess_coord_bounds(output, FREQ(vv)) + + ; Set global attributes + if (VAR(vv).ne."od550aer") then + gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF1, COMMENT) + else + gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF2, COMMENT) + end if + + ; Output file + DATESTR = \ + year + sprinti("%0.2i", month) + "-" + year + sprinti("%0.2i", month) + fout = output_dir_path + \ + str_join((/"OBS", OBSNAME, TYPE, str_sub_str(VERSION, "_", "-"), \ + MIP(vv), VAR(vv), DATESTR/), "_") + ".nc" + + ; Write variable + write_nc(fout, VAR(vv), output, bounds, gAtt) + delete(gAtt) + delete(output) + delete(bounds) + + end do + + end do + +end diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py new file mode 100644 index 0000000000..80859e31f3 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py @@ -0,0 +1,441 @@ +"""ESMValTool CMORizer for MODIS aqua data. + +Tier + Tier 3 + +Source + https://ladsweb.modaps.eosdis.nasa.gov/search/order + +Requirements + python hdf reader, https://pypi.org/project/pyhdf/ + +Modification history + 20260216-Fruttarol_Noah: Written based on the original MODIS CMORization NCL script created by evaldsson_martin, which can be found in at ESMValTool/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl + + +""" + +import logging +import os +import re +from datetime import datetime, timedelta + +import iris +import numpy as np +from dask import array as da +from esmvalcore.cmor.table import CMOR_TABLES +from pyhdf.SD import SD, SDC + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def collect_files(in_dir: str, cfg) -> list: + """Compose input file list and download if missing.""" + + attrs = cfg["attributes"] + # rootpath / Tier{tier}/{dataset}/{version}/{frequency}/{short_name} + # in_dir = rootpath / Tier{tier}/{dataset} + in_dir = os.path.join(in_dir) + fname_pattern = ( + f"{attrs['version']}\\.A.*\\.hdf$" # e.g. "MYD08_M3.A*.hdf" + ) + + files_a = next(os.walk(in_dir), (None, None, []))[ + 2 + ] # get list of files in in_dir + files = [] + for filename in files_a: + if re.search(fname_pattern, filename): + files.append(filename) + + if len(files) == 0: + raise Exception("No files found", in_dir) + + return files + + +def get_year_from_filepath(filepath: str, cfg) -> int: + """get year from first four characters of last section of basename""" + + attrs = cfg["attributes"] + basename = os.path.splitext(os.path.basename(filepath))[0] + offset = len( + f"{attrs['version']}.A" + ) # length of version string + dot + 'A' character + year = int(basename[offset : offset + 4]) + return year + + +def get_month_from_filepath(filepath: str, cfg) -> int: + """get month from timerange section of basename""" + + attrs = cfg["attributes"] + basename = os.path.splitext(os.path.basename(filepath))[0] + offset = len( + f"{attrs['version']}.A" + ) # length of version string + dot + 'A' character + doy = int(basename[offset + 4 : offset + 7]) # day of year + year = int(basename[offset : offset + 4]) + # Convert day-of-year to month + date = datetime(year, 1, 1) + timedelta(days=doy - 1) + month = date.month + logger.debug( + f"Extracted month {month}, doy {doy}, year {year} from filepath {filepath}" + ) + return month + + +def group_files_by_year(filepaths: list, cfg) -> dict: + """group filepaths by year using get_year_from_filepath function""" + years_D = dict() + for filename in filepaths: + year = get_year_from_filepath(filename, cfg) + + if year not in years_D: + years_D[year] = [filename] + else: + years_D[year].append(filename) + + return years_D + + +def read_hdf( + in_dir: str, + filepath: str, + year: int, + month: int, + raw_name: str, + short_name: str, + **extras, +): + """Read HDF file and build iris cube with auxiliary data for special variables.""" + f = SD(os.path.join(in_dir, filepath), SDC.READ) + + data_obj = f.select(raw_name) + data = data_obj.get().astype(np.float32) + + # mask fill values + if hasattr(data_obj, "fill_value"): + data[data == data_obj.fill_value] = np.nan + + # mask values outside valid range + if hasattr(data_obj, "valid_range"): + valid_range = data_obj.valid_range + logger.debug(f"Masking {short_name} data outside range {valid_range}") + data[(data < valid_range[0]) | (data > valid_range[1])] = np.nan + + # apply scale factor and add offset if they exist + if hasattr(data_obj, "scale_factor"): + scale_factor = data_obj.scale_factor + logger.debug(f"Scaling {short_name} data by {scale_factor}") + data *= scale_factor + if hasattr(data_obj, "add_offset"): + add_offset = data_obj.add_offset + logger.debug(f"Offsetting {short_name} data by {add_offset}") + data += add_offset + + # for certain variables, we need to read additional datasets to compute the final variable + + # liquid water path in-cloud, need to use cirrus fraction and cloud fraction to estimate liquid fraction, then multiply by in-cloud lwp to get grid-box average lwp + if short_name in ["clwvi"]: + cfirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean") + cfin = cfirFm_obj.get().astype(np.float32) + # unpack with scale_factor and add_offset + cif = (cfin * cfirFm_obj.scale_factor) + cfirFm_obj.add_offset + cif[cif > 0.999] = np.nan + + cfmm_obj = f.select("Cloud_Fraction_Mean_Mean") + cfmm = cfmm_obj.get().astype(np.float32) + ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset + + # liquid fraction estimated assuming random overlap: + # ctot = 1 - (1 - cif) * (1 - lif) + # --> lif = 1 - (1 - ctot) / (1 - cif) + lif = 1.0 - (1.0 - ctot) / (1.0 - cif) + lif[lif < 0] = 0 + + # ice water path + cwpimm_obj = f.select("Cloud_Water_Path_Ice_Mean_Mean") + cwpimm = cwpimm_obj.get().astype(np.float32) + cwpimm_fv = cwpimm_obj._FillValue + cwpimm[cwpimm == cwpimm_fv] = np.nan # mask all fill values + + # convert iwp in-cloud to gridbox avg using masked cirrus fraction + iwp = cwpimm * cif + + # liquid water path + # convert lwp in-cloud value to grid-box avg + lwp = data * lif + + data = lwp + iwp + + # these variables are all in-cloud values, so we need to multiply by cirrus fraction to get grid-box average + elif short_name in ["clivi", "iwpStderr", "lwpStderr"]: + # cirrus fraction (0-1) + cfswirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean") + cfswirFm = cfswirFm_obj.get().astype(np.float32) + # unpack with scale_factor and add_offset + cf = (cfswirFm * cfswirFm_obj.scale_factor) + cfswirFm_obj.add_offset + cf[cf > 0.999] = np.nan + + # lwpStderr is only for liquid water path, so we want to use the liquid fraction to convert to grid-box average + if short_name in ["lwpStderr"]: + cfmm_obj = f.select("Cloud_Fraction_Mean_Mean") + cfmm = cfmm_obj.get().astype(np.float32) + # unpack with scale_factor and add_offset + ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset + + cif = cf + cif[cf > 0.999] = np.nan + + cf = 1.0 - (1.0 - ctot) / (1.0 - cif) + cf[cf < 0] = 0 + + data *= cf # "grid-box average" lwp/iwp + + # mask all null values (nan, inf) + data = np.ma.masked_invalid(data) + + # Handle units - convert invalid units to '1' + units_str = data_obj.attributes().get("units", "1") + if units_str in ["none", "None", ""]: + units_str = "1" + + # Create time coordinate for the start of the month, with bounds covering the whole month + time_point = datetime( + year=year, month=month, day=15 + ) # use 15th as representative time for the month + time_bounds_lower = datetime(year=year, month=month, day=1) + time_bounds_upper = datetime( + year=year + (month == 12), month=month + 1 - (month == 12) * 12, day=1 + ) - timedelta( + days=1 + ) # end of month is the day before the first day of the next month + + # Convert time to days since 1850-01-01 + time_units = datetime(1850, 1, 1) + delta_1850 = (time_point - time_units).days + # After creating time delta, add bounds: + delta_bounds_lower = (time_bounds_lower - time_units).days + delta_bounds_upper = (time_bounds_upper - time_units).days + + time_coord = iris.coords.DimCoord( + points=[delta_1850], + standard_name="time", + units="days since 1850-01-01 00:00:00", + bounds=[[delta_bounds_lower, delta_bounds_upper]], + ) + data = data[np.newaxis, :, :] # Add time dimension at start + + lats = f.select("YDim").get().astype(np.float64) + lat_coord = iris.coords.DimCoord( + lats, + standard_name="latitude", + units="degrees_north", + bounds=[ + ( # calculate bounds as midpoint between adjacent latitudes, assuming regular grid + lats[i] - 0.5 * np.abs(lats[1] - lats[0]), + lats[i] + 0.5 * np.abs(lats[1] - lats[0]), + ) + for i in range(len(lats)) + ], + ) + + lons = f.select("XDim").get().astype(np.float64) + lon_coord = iris.coords.DimCoord( + lons, + standard_name="longitude", + units="degrees_east", + bounds=[ + ( # calculate bounds as midpoint between adjacent longitudes, assuming regular grid + lons[i] - 0.5 * np.abs(lons[1] - lons[0]), + lons[i] + 0.5 * np.abs(lons[1] - lons[0]), + ) + for i in range(len(lons)) + ], + ) + + if short_name == "od550aer": + # add auxiliary coordinate for wavelength (550 nm for this variable, which is AOD at 550 nm) + wavelength_coord = iris.coords.AuxCoord( + [550], + standard_name="radiation_wavelength", + var_name="wavelength", + units="nm", + ) + aux_coords_and_dims = [ + ( + wavelength_coord, + None, + ), # wavelength is a scalar auxiliary coordinate + ] + else: + aux_coords_and_dims = ( + None # no auxiliary coordinates for other variables + ) + + cube = iris.cube.Cube( + data, + long_name=data_obj.attributes().get("long_name", raw_name), + var_name=short_name, + units=units_str, + dim_coords_and_dims=[ + (time_coord, 0), + (lat_coord, 1), + (lon_coord, 2), + ], + aux_coords_and_dims=aux_coords_and_dims, + ) + + return cube + + +def convert( + cube: iris.cube.Cube, + cmor_info, + attrs, +) -> iris.cube.Cube: + """Convert cube to CMOR standards based on data type and cmor_info.""" + + if attrs.get("reference") is None: + attrs["reference"] = cmor_info.attributes["reference"] + + cube.convert_units(cmor_info.units) + + if cube.coord("longitude").points.min() < 0: + # convert from [-180, 180] to [0, 360] + cube.coord("longitude").points = cube.coord("longitude").points + 180.0 + # roll the data as part of the longitude conversion to maintain correct order (assuming regular grid and that longitude is the last dimension) + nlon = len(cube.coord("longitude").points) + dim_lon = 2 + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=dim_lon) + + if np.diff(cube.coord("latitude").points)[0] < 0: + # convert [90,-90] to [-90,90] + cube.coord("latitude").points = cube.coord("latitude").points[::-1] + # flip the data + cube.data = cube.data[:, ::-1, :] # latitude is axis=1 + + utils.set_global_atts(cube, attrs) + utils.fix_var_metadata(cube, cmor_info) + utils.fix_dim_coordnames(cube) + + utils.fix_coords(cube) + + return cube + + +def _extract_variable( + filepaths: list, + var_d, + attrs, + cmor_table, + cfg, + in_dir: str, + out_dir: str, + year: int, +) -> None: + """Extract variable data from input files, convert to CMOR standards, and save.""" + + # add mip to attributes for use in cmor table lookup and global attributes + attrs["mip"] = var_d["mip"] + + # get cmor table for variable + cmor_info = cmor_table.get_variable( + var_d.get("mip"), var_d.get("short_name") + ) + + # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table + if var_d.get("reference"): + attrs["reference"] = var_d.get("reference") + + if not cmor_table: + raise Exception( + f'project_id "{var_d.get("project_id", "None")}" is invalid, no CMOR table found. Valid options: {", ".join(list(CMOR_TABLES.keys()))}' + ) + + # check if cmor info was found for the variable, if not log an error and skip + if cmor_info is None: + logger.error( + f"CMOR info not found for variable {var_d.get('short_name')} in mip {var_d.get('mip')}" + ) + return + logger.debug( + f"CMOR info for variable {var_d.get('short_name')}: {cmor_info}" + ) + + logger.info( + f"CMORizing variable {var_d.get('short_name')} from input set {var_d.get('raw_name')}" + ) + + cubes = iris.cube.CubeList() + for filepath in filepaths: + month = get_month_from_filepath(filepath, cfg) + logger.debug( + f"File: {filepath} -> var={var_d.get('short_name')} year={year}, month={month}" + ) + cube = read_hdf(in_dir, filepath, year, month, **var_d) + + logger.debug( + f"Read cube for variable {var_d.get('short_name')}: {cube}, with attributes: {cube.attributes}" + ) + cubes.append(cube) + + if len(cubes) > 0: + cube = cubes.concatenate()[ + 0 + ] # concatenate along time dimension and take the resulting cube + else: + cube = cubes[0] + + cube = convert(cube, cmor_info, attrs) + + # add dataset_id to global attributes for provenance + cube_attrs = cube.attributes.globals + cube_attrs["dataset_id"] = attrs["dataset_id"] + logger.debug(f"Saving variable {cube} with attributes: {cube_attrs}") + + utils.save_variable( + cube, + var_d.get("short_name"), + out_dir, + cube_attrs, + unlimited_dimensions=["time"], + ) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Run CMORizer for MODIS.""" + + cmor_table = cfg["cmor_table"] + glob_attrs = cfg["attributes"] + + # get relevant files + filepaths = collect_files(in_dir, cfg) + + # group by year + years_d = group_files_by_year(filepaths, cfg) + years = sorted(list(years_d.keys())) + + # iterate over the variables to be CMORized + for var, var_d in cfg["variables"].items(): + logger.info( + "CMORizing var %s from input set %s", var, var_d["raw_name"] + ) + var_d["short_name"] = var + logger.debug("\n" + format(var_d)) + + # cmorize by year, merging all months at the end + for year in years: + _extract_variable( + years_d[year], + var_d, + glob_attrs, + cmor_table, + cfg, + in_dir, + out_dir, + year, + ) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py new file mode 100644 index 0000000000..53e942be64 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py @@ -0,0 +1,441 @@ +"""ESMValTool CMORizer for MODIS terra data. + +Tier + Tier 3 + +Source + https://ladsweb.modaps.eosdis.nasa.gov/search/order + +Requirements + python hdf reader, https://pypi.org/project/pyhdf/ + +Modification history + 20260216-Fruttarol_Noah: Written based on the original MODIS CMORization NCL script created by evaldsson_martin, which can be found in at ESMValTool/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl + + +""" + +import logging +import os +import re +from datetime import datetime, timedelta + +import iris +import numpy as np +from dask import array as da +from esmvalcore.cmor.table import CMOR_TABLES +from pyhdf.SD import SD, SDC + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def collect_files(in_dir: str, cfg) -> list: + """Compose input file list and download if missing.""" + + attrs = cfg["attributes"] + # rootpath / Tier{tier}/{dataset}/{version}/{frequency}/{short_name} + # in_dir = rootpath / Tier{tier}/{dataset} + in_dir = os.path.join(in_dir) + fname_pattern = ( + f"{attrs['version']}\\.A.*\\.hdf$" # e.g. "MOD08_M3.A*.hdf" + ) + + files_a = next(os.walk(in_dir), (None, None, []))[ + 2 + ] # get list of files in in_dir + files = [] + for filename in files_a: + if re.search(fname_pattern, filename): + files.append(filename) + + if len(files) == 0: + raise Exception("No files found", in_dir) + + return files + + +def get_year_from_filepath(filepath: str, cfg) -> int: + """get year from first four characters of last section of basename""" + + attrs = cfg["attributes"] + basename = os.path.splitext(os.path.basename(filepath))[0] + offset = len( + f"{attrs['version']}.A" + ) # length of version string + dot + 'A' character + year = int(basename[offset : offset + 4]) + return year + + +def get_month_from_filepath(filepath: str, cfg) -> int: + """get month from timerange section of basename""" + + attrs = cfg["attributes"] + basename = os.path.splitext(os.path.basename(filepath))[0] + offset = len( + f"{attrs['version']}.A" + ) # length of version string + dot + 'A' character + doy = int(basename[offset + 4 : offset + 7]) # day of year + year = int(basename[offset : offset + 4]) + # Convert day-of-year to month + date = datetime(year, 1, 1) + timedelta(days=doy - 1) + month = date.month + logger.debug( + f"Extracted month {month}, doy {doy}, year {year} from filepath {filepath}" + ) + return month + + +def group_files_by_year(filepaths: list, cfg) -> dict: + """group filepaths by year using get_year_from_filepath function""" + years_D = dict() + for filename in filepaths: + year = get_year_from_filepath(filename, cfg) + + if year not in years_D: + years_D[year] = [filename] + else: + years_D[year].append(filename) + + return years_D + + +def read_hdf( + in_dir: str, + filepath: str, + year: int, + month: int, + raw_name: str, + short_name: str, + **extras, +): + """Read HDF file and build iris cube with auxiliary data for special variables.""" + f = SD(os.path.join(in_dir, filepath), SDC.READ) + + data_obj = f.select(raw_name) + data = data_obj.get().astype(np.float32) + + # mask fill values + if hasattr(data_obj, "fill_value"): + data[data == data_obj.fill_value] = np.nan + + # mask values outside valid range + if hasattr(data_obj, "valid_range"): + valid_range = data_obj.valid_range + logger.debug(f"Masking {short_name} data outside range {valid_range}") + data[(data < valid_range[0]) | (data > valid_range[1])] = np.nan + + # apply scale factor and add offset if they exist + if hasattr(data_obj, "scale_factor"): + scale_factor = data_obj.scale_factor + logger.debug(f"Scaling {short_name} data by {scale_factor}") + data *= scale_factor + if hasattr(data_obj, "add_offset"): + add_offset = data_obj.add_offset + logger.debug(f"Offsetting {short_name} data by {add_offset}") + data += add_offset + + # for certain variables, we need to read additional datasets to compute the final variable + + # liquid water path in-cloud, need to use cirrus fraction and cloud fraction to estimate liquid fraction, then multiply by in-cloud lwp to get grid-box average lwp + if short_name in ["clwvi"]: + cfirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean") + cfin = cfirFm_obj.get().astype(np.float32) + # unpack with scale_factor and add_offset + cif = (cfin * cfirFm_obj.scale_factor) + cfirFm_obj.add_offset + cif[cif > 0.999] = np.nan + + cfmm_obj = f.select("Cloud_Fraction_Mean_Mean") + cfmm = cfmm_obj.get().astype(np.float32) + ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset + + # liquid fraction estimated assuming random overlap: + # ctot = 1 - (1 - cif) * (1 - lif) + # --> lif = 1 - (1 - ctot) / (1 - cif) + lif = 1.0 - (1.0 - ctot) / (1.0 - cif) + lif[lif < 0] = 0 + + # ice water path + cwpimm_obj = f.select("Cloud_Water_Path_Ice_Mean_Mean") + cwpimm = cwpimm_obj.get().astype(np.float32) + cwpimm_fv = cwpimm_obj._FillValue + cwpimm[cwpimm == cwpimm_fv] = np.nan # mask all fill values + + # convert iwp in-cloud to gridbox avg using masked cirrus fraction + iwp = cwpimm * cif + + # liquid water path + # convert lwp in-cloud value to grid-box avg + lwp = data * lif + + data = lwp + iwp + + # these variables are all in-cloud values, so we need to multiply by cirrus fraction to get grid-box average + elif short_name in ["clivi", "iwpStderr", "lwpStderr"]: + # cirrus fraction (0-1) + cfswirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean") + cfswirFm = cfswirFm_obj.get().astype(np.float32) + # unpack with scale_factor and add_offset + cf = (cfswirFm * cfswirFm_obj.scale_factor) + cfswirFm_obj.add_offset + cf[cf > 0.999] = np.nan + + # lwpStderr is only for liquid water path, so we want to use the liquid fraction to convert to grid-box average + if short_name in ["lwpStderr"]: + cfmm_obj = f.select("Cloud_Fraction_Mean_Mean") + cfmm = cfmm_obj.get().astype(np.float32) + # unpack with scale_factor and add_offset + ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset + + cif = cf + cif[cf > 0.999] = np.nan + + cf = 1.0 - (1.0 - ctot) / (1.0 - cif) + cf[cf < 0] = 0 + + data *= cf # "grid-box average" lwp/iwp + + # mask all null values (nan, inf) + data = np.ma.masked_invalid(data) + + # Handle units - convert invalid units to '1' + units_str = data_obj.attributes().get("units", "1") + if units_str in ["none", "None", ""]: + units_str = "1" + + # Create time coordinate for the start of the month, with bounds covering the whole month + time_point = datetime( + year=year, month=month, day=15 + ) # use 15th as representative time for the month + time_bounds_lower = datetime(year=year, month=month, day=1) + time_bounds_upper = datetime( + year=year + (month == 12), month=month + 1 - (month == 12) * 12, day=1 + ) - timedelta( + days=1 + ) # end of month is the day before the first day of the next month + + # Convert time to days since 1850-01-01 + time_units = datetime(1850, 1, 1) + delta_1850 = (time_point - time_units).days + # After creating time delta, add bounds: + delta_bounds_lower = (time_bounds_lower - time_units).days + delta_bounds_upper = (time_bounds_upper - time_units).days + + time_coord = iris.coords.DimCoord( + points=[delta_1850], + standard_name="time", + units="days since 1850-01-01 00:00:00", + bounds=[[delta_bounds_lower, delta_bounds_upper]], + ) + data = data[np.newaxis, :, :] # Add time dimension at start + + lats = f.select("YDim").get().astype(np.float64) + lat_coord = iris.coords.DimCoord( + lats, + standard_name="latitude", + units="degrees_north", + bounds=[ + ( # calculate bounds as midpoint between adjacent latitudes, assuming regular grid + lats[i] - 0.5 * np.abs(lats[1] - lats[0]), + lats[i] + 0.5 * np.abs(lats[1] - lats[0]), + ) + for i in range(len(lats)) + ], + ) + + lons = f.select("XDim").get().astype(np.float64) + lon_coord = iris.coords.DimCoord( + lons, + standard_name="longitude", + units="degrees_east", + bounds=[ + ( # calculate bounds as midpoint between adjacent longitudes, assuming regular grid + lons[i] - 0.5 * np.abs(lons[1] - lons[0]), + lons[i] + 0.5 * np.abs(lons[1] - lons[0]), + ) + for i in range(len(lons)) + ], + ) + + if short_name == "od550aer": + # add auxiliary coordinate for wavelength (550 nm for this variable, which is AOD at 550 nm) + wavelength_coord = iris.coords.AuxCoord( + [550], + standard_name="radiation_wavelength", + var_name="wavelength", + units="nm", + ) + aux_coords_and_dims = [ + ( + wavelength_coord, + None, + ), # wavelength is a scalar auxiliary coordinate + ] + else: + aux_coords_and_dims = ( + None # no auxiliary coordinates for other variables + ) + + cube = iris.cube.Cube( + data, + long_name=data_obj.attributes().get("long_name", raw_name), + var_name=short_name, + units=units_str, + dim_coords_and_dims=[ + (time_coord, 0), + (lat_coord, 1), + (lon_coord, 2), + ], + aux_coords_and_dims=aux_coords_and_dims, + ) + + return cube + + +def convert( + cube: iris.cube.Cube, + cmor_info, + attrs, +) -> iris.cube.Cube: + """Convert cube to CMOR standards based on data type and cmor_info.""" + + if attrs.get("reference") is None: + attrs["reference"] = cmor_info.attributes["reference"] + + cube.convert_units(cmor_info.units) + + if cube.coord("longitude").points.min() < 0: + # convert from [-180, 180] to [0, 360] + cube.coord("longitude").points = cube.coord("longitude").points + 180.0 + # roll the data as part of the longitude conversion to maintain correct order (assuming regular grid and that longitude is the last dimension) + nlon = len(cube.coord("longitude").points) + dim_lon = 2 + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=dim_lon) + + if np.diff(cube.coord("latitude").points)[0] < 0: + # convert [90,-90] to [-90,90] + cube.coord("latitude").points = cube.coord("latitude").points[::-1] + # flip the data + cube.data = cube.data[:, ::-1, :] # latitude is axis=1 + + utils.set_global_atts(cube, attrs) + utils.fix_var_metadata(cube, cmor_info) + utils.fix_dim_coordnames(cube) + + utils.fix_coords(cube) + + return cube + + +def _extract_variable( + filepaths: list, + var_d, + attrs, + cmor_table, + cfg, + in_dir: str, + out_dir: str, + year: int, +) -> None: + """Extract variable data from input files, convert to CMOR standards, and save.""" + + # add mip to attributes for use in cmor table lookup and global attributes + attrs["mip"] = var_d["mip"] + + # get the cmor table for the variable + cmor_info = cmor_table.get_variable( + var_d.get("mip"), var_d.get("short_name") + ) + + # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table + if var_d.get("reference"): + attrs["reference"] = var_d.get("reference") + + if not cmor_table: + raise Exception( + f'project_id "{var_d.get("project_id", "None")}" is invalid, no CMOR table found. Valid options: {", ".join(list(CMOR_TABLES.keys()))}' + ) + + # check if cmor info was found for the variable, if not log an error and skip + if cmor_info is None: + logger.error( + f"CMOR info not found for variable {var_d.get('short_name')} in mip {var_d.get('mip')}" + ) + return + logger.debug( + f"CMOR info for variable {var_d.get('short_name')}: {cmor_info}" + ) + + logger.info( + f"CMORizing variable {var_d.get('short_name')} from input set {var_d.get('raw_name')}" + ) + + cubes = iris.cube.CubeList() + for filepath in filepaths: + month = get_month_from_filepath(filepath, cfg) + logger.debug( + f"File: {filepath} -> var={var_d.get('short_name')} year={year}, month={month}" + ) + cube = read_hdf(in_dir, filepath, year, month, **var_d) + + logger.debug( + f"Read cube for variable {var_d.get('short_name')}: {cube}, with attributes: {cube.attributes}" + ) + cubes.append(cube) + + if len(cubes) > 0: + cube = cubes.concatenate()[ + 0 + ] # concatenate along time dimension and take the resulting cube + else: + cube = cubes[0] + + cube = convert(cube, cmor_info, attrs) + + # add dataset_id to global attributes for provenance + cube_attrs = cube.attributes.globals + cube_attrs["dataset_id"] = attrs["dataset_id"] + logger.debug(f"Saving variable {cube} with attributes: {cube_attrs}") + + utils.save_variable( + cube, + var_d.get("short_name"), + out_dir, + cube_attrs, + unlimited_dimensions=["time"], + ) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Run CMORizer for MODIS.""" + + cmor_table = cfg["cmor_table"] + glob_attrs = cfg["attributes"] + + # get relevant files + filepaths = collect_files(in_dir, cfg) + + # group by year + years_d = group_files_by_year(filepaths, cfg) + years = sorted(list(years_d.keys())) + + # iterate over the variables to be CMORized + for var, var_d in cfg["variables"].items(): + logger.info( + "CMORizing var %s from input set %s", var, var_d["raw_name"] + ) + var_d["short_name"] = var + logger.debug("\n" + format(var_d)) + + # cmorize by year, merging all months at the end + for year in years: + _extract_variable( + years_d[year], + var_d, + glob_attrs, + cmor_table, + cfg, + in_dir, + out_dir, + year, + ) diff --git a/esmvaltool/config-references.yml b/esmvaltool/config-references.yml index 939696123f..f0aed75a6d 100644 --- a/esmvaltool/config-references.yml +++ b/esmvaltool/config-references.yml @@ -229,6 +229,10 @@ authors: name: Winterstein, Franziska institute: DLR, Germany orcid: https://orcid.org/0000-0002-2406-4936 + fruttarol_noah: + name: Fruttarol, Noah + institute: CCCma, ECCC, Canada + github: noahfruttarol fuckar_neven: name: Fuckar, Neven institute: BSC, Spain diff --git a/pyproject.toml b/pyproject.toml index c31d67b3e9..9dcb1d5aca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ dependencies = [ "packaging", "pandas", "progressbar2", + "pyhdf", "pyproj>=2.1", "pys2index", "python-dateutil",