From 76025d67b46364a905ffa94be4cc540491371f12 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Tue, 17 Feb 2026 00:41:20 +0000
Subject: [PATCH 01/13] Created python implimentaions for cmorizing MODIS terra
 and aqua data in cmip6

---
 .../cmorizers/data/cmor_config/MODIS-Aqua.yml |  49 ++
 .../data/cmor_config/MODIS-Terra.yml          |  49 ++
 .../data/formatters/datasets/_modis_aqua.ncl  | 253 ++++++++++
 .../data/formatters/datasets/_modis_terra.ncl | 253 ++++++++++
 .../data/formatters/datasets/modis_aqua.py    | 447 ++++++++++++++++++
 .../data/formatters/datasets/modis_terra.py   | 447 ++++++++++++++++++
 pyproject.toml                                |   1 +
 7 files changed, 1499 insertions(+)
 create mode 100644 esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
 create mode 100644 esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py

diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
new file mode 100644
index 0000000000..29329712c9
--- /dev/null
+++ b/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
@@ -0,0 +1,49 @@
+---
+# Author: 2026-01-29 Kristi Webb
+
+# Common global attributes for Cmorizer output
+attributes:
+
+  project_id: CMIP6
+  dataset_id: MODIS-Aqua
+  modeling_realm: atmos
+  type: sat
+  tier: 3
+  version: MYD08_M3
+  source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
+  reference: modis1
+
+  filename_fmt: "MYD08_M3.A*.hdf"
+
+variables:
+  clwvi:
+    raw_name: Cloud_Water_Path_Liquid_Mean_Mean
+    mip: Amon
+    frequency: mon
+  clivi:
+    raw_name: Cloud_Water_Path_Ice_Mean_Mean
+    mip: Amon
+    frequency: mon
+  clt:
+    raw_name: Cloud_Fraction_Mean_Mean
+    mip: Amon
+    frequency: mon
+  lwpStderr:
+    raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty
+    mip: Amon
+    frequency: mon
+    project_id: custom
+  iwpStderr:
+    raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty
+    mip: Amon
+    frequency: mon
+    project_id: custom
+  od550aer:
+    raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean
+    mip: AERmon
+    frequency: mon
+    reference: modis2
+  reffclwtop:
+    raw_name: Cloud_Effective_Radius_Liquid_Mean_Mean
+    mip: AERmon
+    frequency: mon
diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
new file mode 100644
index 0000000000..94a3a686cc
--- /dev/null
+++ b/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
@@ -0,0 +1,49 @@
+---
+# Author: 2026-01-29 Kristi Webb
+
+# Common global attributes for Cmorizer output
+attributes:
+
+  project_id: CMIP6
+  dataset_id: MODIS-Terra
+  modeling_realm: atmos
+  type: sat
+  tier: 3
+  version: MOD08_M3
+  source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
+  reference: modis1
+
+  filename_fmt: "MOD08_M3.A*.hdf"
+
+variables:
+  clwvi:
+    raw_name: Cloud_Water_Path_Liquid_Mean_Mean
+    mip: Amon
+    frequency: mon
+  clivi:
+    raw_name: Cloud_Water_Path_Ice_Mean_Mean
+    mip: Amon
+    frequency: mon
+  clt:
+    raw_name: Cloud_Fraction_Mean_Mean
+    mip: Amon
+    frequency: mon
+  lwpStderr:
+    raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty
+    mip: Amon
+    frequency: mon
+    project_id: custom
+  iwpStderr:
+    raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty
+    mip: Amon
+    frequency: mon
+    project_id: custom
+  od550aer:
+    raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean
+    mip: AERmon
+    frequency: mon
+    reference: modis2
+  reffclwtop:
+    raw_name: Cloud_Effective_Radius_Liquid_Mean_Mean
+    mip: AERmon
+    frequency: mon
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl b/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
new file mode 100644
index 0000000000..f1829bd6b6
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
@@ -0,0 +1,253 @@
+; #############################################################################
+; ESMValTool CMORizer for MODIS data
+; #############################################################################
+;
+; Tier
+;    Tier 3: restricted dataset.
+;
+; Source
+;    https://ladsweb.modaps.eosdis.nasa.gov/search/order
+;
+; Last access
+;    20190209
+;
+; Download and processing instructions
+;    In Products: select "MODIS Aqua", "Collection 6.1" and
+;                 "L3 Atmosphere Product", click on MYD08_M3.
+;    In Time: select from 2000-01-01 to today.
+;    In Location: skip, the global domain will be applied.
+;    In Files: select all.
+;    Submit the order.
+;    A registration is required to download the data.
+;
+; Caveats
+;    clwvi and clivi data are in-cloud values whereas CMIP5 models provide
+;    grid-box averages --> multiply MODIS clwvi and clivi values with cloud
+;    fraction as a first guess
+;
+; Modification history
+;    20180209-righi_mattia: fixed bug in lwpStderr.
+;    20180209-hassler_birgit: adapted to v2.
+;    20180810-righi_mattia: fix minor calendar issue.
+;    20180806-righi_mattia: code cleaning.
+;    20170116-lauer_axel: using cirrus fraction to gridbox averages.
+;    20160408-lauer_axel: added processing of uncertainties.
+;    20151118-lauer_axel: bugfix: added unit conversion.
+;    20150430-evaldsson_martin: written.
+;
+; #############################################################################
+loadscript(getenv("esmvaltool_root") + \
+           "/data/formatters/interface.ncl")
+
+begin
+
+  ; Script name (for logger)
+  DIAG_SCRIPT = "modis_aqua.ncl"
+
+  ; Source name
+  OBSNAME = "MODIS-Aqua"
+
+  ; Tier
+  TIER = 3
+
+  ; Selected variable (standard name)
+  VAR = (/"clwvi",     \
+          "clivi",     \
+          "clt",       \
+          "lwpStderr", \
+          "iwpStderr", \
+          "od550aer",  \
+          "reffclwtop"/)
+
+  ; Name in the raw data
+  NAME = (/"Cloud_Water_Path_Liquid_Mean_Mean",        \
+           "Cloud_Water_Path_Ice_Mean_Mean",           \
+           "Cloud_Fraction_Mean_Mean",                 \
+           "Cloud_Water_Path_Liquid_Mean_Uncertainty", \
+           "Cloud_Water_Path_Ice_Mean_Uncertainty",    \
+           "AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean", \
+           "Cloud_Effective_Radius_Liquid_Mean_Mean"/)
+
+  ; MIP
+  MIP = (/"Amon", "Amon", "Amon", "Amon", "Amon", "aero", "aero"/)
+
+  ; Frequency
+  FREQ = (/"mon", "mon", "mon", "mon", "mon", "mon", "mon"/)
+
+  ; Version
+  VERSION = "MYD08_M3"
+
+  ; CMOR table
+  CMOR_TABLE = getenv("cmor_tables") + \
+    (/"/cmip5/Tables/CMIP5_Amon", \
+      "/cmip5/Tables/CMIP5_Amon", \
+      "/cmip5/Tables/CMIP5_Amon", \
+      "/custom/CMOR_lwpStderr.dat", \
+      "/custom/CMOR_iwpStderr.dat", \
+      "/cmip5/Tables/CMIP5_aero", \
+      "/cmip5/Tables/CMIP5_aero"/)
+
+  ; Type
+  TYPE = "sat"
+
+  ; Global attributes
+  SOURCE = "https://ladsweb.modaps.eosdis.nasa.gov/search/order"
+  REF1 = "Platnick et al., IEEE Trans. Geosci. Remote Sens., " + \
+    "doi:10.1109/TGRS.2002.808301, 2003."
+  REF2 = "Levy et al., Atmos. Meas. Tech., " + \
+    "doi:10.5194/amt-6-2989-2013, 2013."
+  COMMENT = ""
+
+end
+
+begin
+
+  ; List of files
+  FILES = systemfunc("ls -1 " + input_dir_path + VERSION + ".A*.hdf")
+
+  do ff = 0, dimsizes(FILES) - 1
+
+    fin = addfile(FILES(ff), "r")
+
+    ; Get time
+    infile = systemfunc("basename " + FILES(ff))
+    date   = yyyyddd_to_yyyymmdd(toint(str_get_cols(infile, 10, 16)))
+    year   = toint(str_get_cols(tostring(date), 0, 3))
+    month  = toint(str_get_cols(tostring(date), 4, 5))
+    dm = days_in_month(year, month)
+
+    ; Loop over variables to fetch from input file
+    do vv = 0, dimsizes(VAR) - 1
+
+      invar = fin->$NAME(vv)$
+      invar_fv = invar@_FillValue
+      invar_coords = invar
+      invar := tofloat(invar)
+      invar := where(invar.eq.tofloat(invar_fv), \
+                     default_fillvalue("float"), invar)
+
+      ; Special case clwvi as the sum lwp + iwp
+      if (VAR(vv).eq."clwvi") then
+        if (NAME(vv).ne."Cloud_Water_Path_Liquid_Mean_Mean") then
+          error_msg("f", DIAG_SCRIPT, "", "cannot calculate clwvi")
+        end if
+
+        ; Read cirrus fraction
+        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
+        cfin = fin->Cirrus_Fraction_Infrared_FMean
+        cif = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+        ; liquid fraction is estimated assuming random overlap, i.e.
+        ; ctot = 1 - (1 - cif) * (1 - lif)
+        ; --> lif = 1 - (1 - ctot) / (1 - cif)
+        delete(cfin)
+        cfin = fin->Cloud_Fraction_Mean_Mean
+        ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+        delete(cfin)
+        cif = where(cif.gt.0.999, cif@_FillValue, cif)
+        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
+        lif = where(lif.lt.0, 0, lif)
+        tmpvar = fin->Cloud_Water_Path_Ice_Mean_Mean  ; read ice water path
+        tmpvar_fv = tmpvar@_FillValue
+        tmpvar := tofloat(tmpvar)
+        tmpvar := where(tmpvar.eq.tofloat(tmpvar_fv), \
+                        default_fillvalue("float"), \
+                        tmpvar)
+        tmpvar = tmpvar * cif  ; convert iwp in-cloud value to gridbox avg
+        invar = invar * lif  ; convert lwp in-cloud value to grid-box avg
+        invar = invar + tmpvar  ; clwvi = lwp + iwp
+        delete(tmpvar)
+        delete(lif)
+        delete(cif)
+        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
+      end if
+
+      ; lwp and iwp are in-cloud values
+      ; convert lwp/iwp to grid-box averages by multiplying with
+      ; average cloud fraction (not optimum but best we can do at the moment)
+      if (any((/"clivi", "iwpStderr", "lwpStderr"/) .eq. VAR(vv))) then
+
+        ; Read cirrus fraction (0-1)
+        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
+        cfin = fin->Cirrus_Fraction_Infrared_FMean
+        cf = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+        delete(cfin)
+        if (VAR(vv).eq."lwpStderr") then
+          cfin = fin->Cloud_Fraction_Mean_Mean
+          ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+          delete(cfin)
+          cif = where(cf.gt.0.999, cf@_FillValue, cf)
+          cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
+          cf = where(cf.lt.0, 0, cf)
+          delete(cif)
+          delete(ctot)
+        end if
+        invar = invar * cf  ; ; "grid-box average" lwp/iwp
+        delete(cf)
+        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
+      end if
+
+      invar@_FillValue = default_fillvalue("float")
+      copy_VarCoords(invar_coords, invar)
+      if (isatt(invar_coords, "scale_factor")) then
+        invar = invar * tofloat(invar_coords@scale_factor)
+      end if
+      if (isatt(invar_coords, "add_offset")) then
+        invar = invar + tofloat(invar_coords@add_offset)
+      end if
+
+      if (VAR(vv).eq."clt") then
+        invar = 100.0 * invar  ; [1] --> [%]
+      end if
+
+      ; Create output variable
+      lat = fin->YDim
+      lon = fin->XDim
+      output = new((/1, dimsizes(lat), dimsizes(lon)/), float)
+      output!0 = "time"
+      output!1 = "lat"
+      output!2 = "lon"
+      output&time = cd_inv_calendar(year, month, 15, 0, 0, 0, TUNITS, 0)
+      output&lat = lat
+      output&lon = lon
+      output(0, :, :) = (/invar/)
+      delete(invar)
+      delete(invar_coords)
+
+      ; Format coordinates
+      format_coords(output, year + sprinti("%0.2i", month) + "01", \
+                    year + sprinti("%0.2i", month) + dm, FREQ(vv))
+
+      ; Set variable attributes
+      tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv))
+      delete(output)
+      output = tmp
+      delete(tmp)
+
+      ; Calculate coordinate bounds
+      bounds = guess_coord_bounds(output, FREQ(vv))
+
+      ; Set global attributes
+      if (VAR(vv).ne."od550aer") then
+        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF1, COMMENT)
+      else
+        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF2, COMMENT)
+      end if
+
+      ; Output file
+      DATESTR = \
+        year + sprinti("%0.2i", month) + "-" + year + sprinti("%0.2i", month)
+      fout = output_dir_path + \
+        str_join((/"OBS", OBSNAME, TYPE, str_sub_str(VERSION, "_", "-"), \
+                   MIP(vv), VAR(vv), DATESTR/), "_") + ".nc"
+
+      ; Write variable
+      write_nc(fout, VAR(vv), output, bounds, gAtt)
+      delete(gAtt)
+      delete(output)
+      delete(bounds)
+
+    end do
+
+  end do
+
+end
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl b/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
new file mode 100644
index 0000000000..2e02498a88
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
@@ -0,0 +1,253 @@
+; #############################################################################
+; ESMValTool CMORizer for MODIS data
+; #############################################################################
+;
+; Tier
+;    Tier 3: restricted dataset.
+;
+; Source
+;    https://ladsweb.modaps.eosdis.nasa.gov/search/order
+;
+; Last access
+;    20190209
+;
+; Download and processing instructions
+;    In Products: select "MODIS Aqua", "Collection 6.1" and
+;                 "L3 Atmosphere Product", click on MOD08_M3.
+;    In Time: select from 2000-01-01 to today.
+;    In Location: skip, the global domain will be applied.
+;    In Files: select all.
+;    Submit the order.
+;    A registration is required to download the data.
+;
+; Caveats
+;    clwvi and clivi data are in-cloud values whereas CMIP5 models provide
+;    grid-box averages --> multiply MODIS clwvi and clivi values with cloud
+;    fraction as a first guess
+;
+; Modification history
+;    20180209-righi_mattia: fixed bug in lwpStderr.
+;    20180209-hassler_birgit: adapted to v2.
+;    20180810-righi_mattia: fix minor calendar issue.
+;    20180806-righi_mattia: code cleaning.
+;    20170116-lauer_axel: using cirrus fraction to gridbox averages.
+;    20160408-lauer_axel: added processing of uncertainties.
+;    20151118-lauer_axel: bugfix: added unit conversion.
+;    20150430-evaldsson_martin: written.
+;
+; #############################################################################
+loadscript(getenv("esmvaltool_root") + \
+           "/data/formatters/interface.ncl")
+
+begin
+
+  ; Script name (for logger)
+  DIAG_SCRIPT = "modis_terra.ncl"
+
+  ; Source name
+  OBSNAME = "MODIS-Terra"
+
+  ; Tier
+  TIER = 3
+
+  ; Selected variable (standard name)
+  VAR = (/"clwvi",     \
+          "clivi",     \
+          "clt",       \
+          "lwpStderr", \
+          "iwpStderr", \
+          "od550aer",  \
+          "reffclwtop"/)
+
+  ; Name in the raw data
+  NAME = (/"Cloud_Water_Path_Liquid_Mean_Mean",        \
+           "Cloud_Water_Path_Ice_Mean_Mean",           \
+           "Cloud_Fraction_Mean_Mean",                 \
+           "Cloud_Water_Path_Liquid_Mean_Uncertainty", \
+           "Cloud_Water_Path_Ice_Mean_Uncertainty",    \
+           "AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean", \
+           "Cloud_Effective_Radius_Liquid_Mean_Mean"/)
+
+  ; MIP
+  MIP = (/"Amon", "Amon", "Amon", "Amon", "Amon", "aero", "aero"/)
+
+  ; Frequency
+  FREQ = (/"mon", "mon", "mon", "mon", "mon", "mon", "mon"/)
+
+  ; Version
+  VERSION = "MOD08_M3"
+
+  ; CMOR table
+  CMOR_TABLE = getenv("cmor_tables") + \
+    (/"/cmip5/Tables/CMIP5_Amon", \
+      "/cmip5/Tables/CMIP5_Amon", \
+      "/cmip5/Tables/CMIP5_Amon", \
+      "/custom/CMOR_lwpStderr.dat", \
+      "/custom/CMOR_iwpStderr.dat", \
+      "/cmip5/Tables/CMIP5_aero", \
+      "/cmip5/Tables/CMIP5_aero"/)
+
+  ; Type
+  TYPE = "sat"
+
+  ; Global attributes
+  SOURCE = "https://ladsweb.modaps.eosdis.nasa.gov/search/order"
+  REF1 = "Platnick et al., IEEE Trans. Geosci. Remote Sens., " + \
+    "doi:10.1109/TGRS.2002.808301, 2003."
+  REF2 = "Levy et al., Atmos. Meas. Tech., " + \
+    "doi:10.5194/amt-6-2989-2013, 2013."
+  COMMENT = ""
+
+end
+
+begin
+
+  ; List of files
+  FILES = systemfunc("ls -1 " + input_dir_path + VERSION + ".A*.hdf")
+
+  do ff = 0, dimsizes(FILES) - 1
+
+    fin = addfile(FILES(ff), "r")
+
+    ; Get time
+    infile = systemfunc("basename " + FILES(ff))
+    date   = yyyyddd_to_yyyymmdd(toint(str_get_cols(infile, 10, 16)))
+    year   = toint(str_get_cols(tostring(date), 0, 3))
+    month  = toint(str_get_cols(tostring(date), 4, 5))
+    dm = days_in_month(year, month)
+
+    ; Loop over variables to fetch from input file
+    do vv = 0, dimsizes(VAR) - 1
+
+      invar = fin->$NAME(vv)$
+      invar_fv = invar@_FillValue
+      invar_coords = invar
+      invar := tofloat(invar)
+      invar := where(invar.eq.tofloat(invar_fv), \
+                     default_fillvalue("float"), invar)
+
+      ; Special case clwvi as the sum lwp + iwp
+      if (VAR(vv).eq."clwvi") then
+        if (NAME(vv).ne."Cloud_Water_Path_Liquid_Mean_Mean") then
+          error_msg("f", DIAG_SCRIPT, "", "cannot calculate clwvi")
+        end if
+
+        ; Read cirrus fraction
+        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
+        cfin = fin->Cirrus_Fraction_Infrared_FMean
+        cif = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+        ; liquid fraction is estimated assuming random overlap, i.e.
+        ; ctot = 1 - (1 - cif) * (1 - lif)
+        ; --> lif = 1 - (1 - ctot) / (1 - cif)
+        delete(cfin)
+        cfin = fin->Cloud_Fraction_Mean_Mean
+        ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+        delete(cfin)
+        cif = where(cif.gt.0.999, cif@_FillValue, cif)
+        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
+        lif = where(lif.lt.0, 0, lif)
+        tmpvar = fin->Cloud_Water_Path_Ice_Mean_Mean  ; read ice water path
+        tmpvar_fv = tmpvar@_FillValue
+        tmpvar := tofloat(tmpvar)
+        tmpvar := where(tmpvar.eq.tofloat(tmpvar_fv), \
+                        default_fillvalue("float"), \
+                        tmpvar)
+        tmpvar = tmpvar * cif  ; convert iwp in-cloud value to gridbox avg
+        invar = invar * lif  ; convert lwp in-cloud value to grid-box avg
+        invar = invar + tmpvar  ; clwvi = lwp + iwp
+        delete(tmpvar)
+        delete(lif)
+        delete(cif)
+        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
+      end if
+
+      ; lwp and iwp are in-cloud values
+      ; convert lwp/iwp to grid-box averages by multiplying with
+      ; average cloud fraction (not optimum but best we can do at the moment)
+      if (any((/"clivi", "iwpStderr", "lwpStderr"/) .eq. VAR(vv))) then
+
+        ; Read cirrus fraction (0-1)
+        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
+        cfin = fin->Cirrus_Fraction_Infrared_FMean
+        cf = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+        delete(cfin)
+        if (VAR(vv).eq."lwpStderr") then
+          cfin = fin->Cloud_Fraction_Mean_Mean
+          ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
+          delete(cfin)
+          cif = where(cf.gt.0.999, cf@_FillValue, cf)
+          cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
+          cf = where(cf.lt.0, 0, cf)
+          delete(cif)
+          delete(ctot)
+        end if
+        invar = invar * cf  ; ; "grid-box average" lwp/iwp
+        delete(cf)
+        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
+      end if
+
+      invar@_FillValue = default_fillvalue("float")
+      copy_VarCoords(invar_coords, invar)
+      if (isatt(invar_coords, "scale_factor")) then
+        invar = invar * tofloat(invar_coords@scale_factor)
+      end if
+      if (isatt(invar_coords, "add_offset")) then
+        invar = invar + tofloat(invar_coords@add_offset)
+      end if
+
+      if (VAR(vv).eq."clt") then
+        invar = 100.0 * invar  ; [1] --> [%]
+      end if
+
+      ; Create output variable
+      lat = fin->YDim
+      lon = fin->XDim
+      output = new((/1, dimsizes(lat), dimsizes(lon)/), float)
+      output!0 = "time"
+      output!1 = "lat"
+      output!2 = "lon"
+      output&time = cd_inv_calendar(year, month, 15, 0, 0, 0, TUNITS, 0)
+      output&lat = lat
+      output&lon = lon
+      output(0, :, :) = (/invar/)
+      delete(invar)
+      delete(invar_coords)
+
+      ; Format coordinates
+      format_coords(output, year + sprinti("%0.2i", month) + "01", \
+                    year + sprinti("%0.2i", month) + dm, FREQ(vv))
+
+      ; Set variable attributes
+      tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv))
+      delete(output)
+      output = tmp
+      delete(tmp)
+
+      ; Calculate coordinate bounds
+      bounds = guess_coord_bounds(output, FREQ(vv))
+
+      ; Set global attributes
+      if (VAR(vv).ne."od550aer") then
+        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF1, COMMENT)
+      else
+        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF2, COMMENT)
+      end if
+
+      ; Output file
+      DATESTR = \
+        year + sprinti("%0.2i", month) + "-" + year + sprinti("%0.2i", month)
+      fout = output_dir_path + \
+        str_join((/"OBS", OBSNAME, TYPE, str_sub_str(VERSION, "_", "-"), \
+                   MIP(vv), VAR(vv), DATESTR/), "_") + ".nc"
+
+      ; Write variable
+      write_nc(fout, VAR(vv), output, bounds, gAtt)
+      delete(gAtt)
+      delete(output)
+      delete(bounds)
+
+    end do
+
+  end do
+
+end
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
new file mode 100644
index 0000000000..f43bdc8d2b
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
@@ -0,0 +1,447 @@
+"""ESMValTool CMORizer for MODIS aqua data.
+
+Tier
+    Tier 3
+
+Source
+    https://ladsweb.modaps.eosdis.nasa.gov/search/order
+
+Requirements
+  python hdf reader, https://pypi.org/project/pyhdf/
+
+Modification history
+    20260216-Fruttarol_Noah: Written based on the original MODIS CMORization NCL script created by evaldsson_martin, which can be found in at ESMValTool/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
+
+
+"""
+
+import logging
+import os
+import re
+from datetime import datetime, timedelta
+
+import iris
+import numpy as np
+from dask import array as da
+from esmvalcore.cmor.table import CMOR_TABLES
+from pyhdf.SD import SD, SDC
+
+from esmvaltool.cmorizers.data import utilities as utils
+
+logger = logging.getLogger(__name__)
+
+
+def collect_files(in_dir: str, cfg) -> list:
+    """Compose input file list and download if missing."""
+
+    attrs = cfg["attributes"]
+    # rootpath / Tier{tier}/{dataset}/{version}/{frequency}/{short_name}
+    # in_dir  = rootpath / Tier{tier}/{dataset}
+    in_dir = os.path.join(in_dir)
+    fname_pattern = (
+        f"{attrs['version']}\\.A.*\\.hdf$"  # e.g. "MYD08_M3.A*.hdf"
+    )
+
+    files_a = next(os.walk(in_dir), (None, None, []))[
+        2
+    ]  # get list of files in in_dir
+    files = []
+    for filename in files_a:
+        if re.search(fname_pattern, filename):
+            files.append(filename)
+
+    if len(files) == 0:
+        raise Exception("No files found", in_dir)
+
+    return files
+
+
+def get_year_from_filepath(filepath: str, cfg) -> int:
+    """get year from first four characters of last section of basename"""
+
+    attrs = cfg["attributes"]
+    basename = os.path.splitext(os.path.basename(filepath))[0]
+    offset = len(
+        f"{attrs['version']}.A"
+    )  # length of version string + dot + 'A' character
+    year = int(basename[offset : offset + 4])
+    return year
+
+
+def get_month_from_filepath(filepath: str, cfg) -> int:
+    """get month from timerange section of basename"""
+
+    attrs = cfg["attributes"]
+    basename = os.path.splitext(os.path.basename(filepath))[0]
+    offset = len(
+        f"{attrs['version']}.A"
+    )  # length of version string + dot + 'A' character
+    doy = int(basename[offset + 4 : offset + 7])  # day of year
+    year = int(basename[offset : offset + 4])
+    # Convert day-of-year to month
+    date = datetime(year, 1, 1) + timedelta(days=doy - 1)
+    month = date.month
+    logger.debug(
+        f"Extracted month {month}, doy {doy}, year {year} from filepath {filepath}"
+    )
+    return month
+
+
+def group_files_by_year(filepaths: list, cfg) -> dict:
+    """group filepaths by year using get_year_from_filepath function"""
+    years_D = dict()
+    for filename in filepaths:
+        year = get_year_from_filepath(filename, cfg)
+
+        if year not in years_D:
+            years_D[year] = [filename]
+        else:
+            years_D[year].append(filename)
+
+    return years_D
+
+
+def read_hdf(
+    in_dir: str,
+    filepath: str,
+    year: int,
+    month: int,
+    raw_name: str,
+    short_name: str,
+    **extras,
+):
+    """Read HDF file and build iris cube with auxiliary data for special variables."""
+    f = SD(os.path.join(in_dir, filepath), SDC.READ)
+
+    data_obj = f.select(raw_name)
+    data = data_obj.get().astype(np.float32)
+
+    # mask fill values
+    if hasattr(data_obj, "fill_value"):
+        data[data == data_obj.fill_value] = np.nan
+
+    # mask values outside valid range
+    if hasattr(data_obj, "valid_range"):
+        valid_range = data_obj.valid_range
+        logger.debug(f"Masking {short_name} data outside range {valid_range}")
+        data[(data < valid_range[0]) | (data > valid_range[1])] = np.nan
+
+    # apply scale factor and add offset if they exist
+    if hasattr(data_obj, "scale_factor"):
+        scale_factor = data_obj.scale_factor
+        logger.debug(f"Scaling {short_name} data by {scale_factor}")
+        data *= scale_factor
+    if hasattr(data_obj, "add_offset"):
+        add_offset = data_obj.add_offset
+        logger.debug(f"Offsetting {short_name} data by {add_offset}")
+        data += add_offset
+
+    # for certain variables, we need to read additional datasets to compute the final variable
+
+    # liquid water path in-cloud, need to use cirrus fraction and cloud fraction to estimate liquid fraction, then multiply by in-cloud lwp to get grid-box average lwp
+    if short_name in ["clwvi"]:
+        cfirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
+        cfin = cfirFm_obj.get().astype(np.float32)
+        # unpack with scale_factor and add_offset
+        cif = (cfin * cfirFm_obj.scale_factor) + cfirFm_obj.add_offset
+        cif[cif > 0.999] = np.nan
+
+        cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
+        cfmm = cfmm_obj.get().astype(np.float32)
+        ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
+
+        # liquid fraction estimated assuming random overlap:
+        # ctot = 1 - (1 - cif) * (1 - lif)
+        # --> lif = 1 - (1 - ctot) / (1 - cif)
+        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
+        lif[lif < 0] = 0
+
+        # ice water path
+        cwpimm_obj = f.select("Cloud_Water_Path_Ice_Mean_Mean")
+        cwpimm = cwpimm_obj.get().astype(np.float32)
+        cwpimm_fv = cwpimm_obj._FillValue
+        cwpimm[cwpimm == cwpimm_fv] = np.nan  # mask all fill values
+
+        # convert iwp in-cloud to gridbox avg using masked cirrus fraction
+        iwp = cwpimm * cif
+
+        # liquid water path
+        # convert lwp in-cloud value to grid-box avg
+        lwp = data * lif
+
+        data = lwp + iwp
+
+    # these variables are all in-cloud values, so we need to multiply by cirrus fraction to get grid-box average
+    elif short_name in ["clivi", "iwpStderr", "lwpStderr"]:
+        # cirrus fraction (0-1)
+        cfswirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
+        cfswirFm = cfswirFm_obj.get().astype(np.float32)
+        # unpack with scale_factor and add_offset
+        cf = (cfswirFm * cfswirFm_obj.scale_factor) + cfswirFm_obj.add_offset
+        cf[cf > 0.999] = np.nan
+
+        # lwpStderr is only for liquid water path, so we want to use the liquid fraction to convert to grid-box average
+        if short_name in ["lwpStderr"]:
+            cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
+            cfmm = cfmm_obj.get().astype(np.float32)
+            # unpack with scale_factor and add_offset
+            ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
+
+            cif = cf
+            cif[cf > 0.999] = np.nan
+
+            cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
+            cf[cf < 0] = 0
+
+        data *= cf  # "grid-box average" lwp/iwp
+
+    # mask all null values (nan, inf)
+    data = np.ma.masked_invalid(data)
+
+    # Handle units - convert invalid units to '1'
+    units_str = data_obj.attributes().get("units", "1")
+    if units_str in ["none", "None", ""]:
+        units_str = "1"
+
+    # Create time coordinate for the start of the month, with bounds covering the whole month
+    time_point = datetime(
+        year=year, month=month, day=15
+    )  # use 15th as representative time for the month
+    time_bounds_lower = datetime(year=year, month=month, day=1)
+    time_bounds_upper = datetime(
+        year=year + (month == 12), month=month + 1 - (month == 12) * 12, day=1
+    ) - timedelta(
+        days=1
+    )  # end of month is the day before the first day of the next month
+
+    # Convert time to days since 1850-01-01
+    time_units = datetime(1850, 1, 1)
+    delta_1850 = (time_point - time_units).days
+    # After creating time delta, add bounds:
+    delta_bounds_lower = (time_bounds_lower - time_units).days
+    delta_bounds_upper = (time_bounds_upper - time_units).days
+
+    time_coord = iris.coords.DimCoord(
+        points=[delta_1850],
+        standard_name="time",
+        units="days since 1850-01-01 00:00:00",
+        bounds=[[delta_bounds_lower, delta_bounds_upper]],
+    )
+    data = data[np.newaxis, :, :]  # Add time dimension at start
+
+    lats = f.select("YDim").get().astype(np.float64)
+    lat_coord = iris.coords.DimCoord(
+        lats,
+        standard_name="latitude",
+        units="degrees_north",
+        bounds=[
+            (  # calculate bounds as midpoint between adjacent latitudes, assuming regular grid
+                lats[i] - 0.5 * np.abs(lats[1] - lats[0]),
+                lats[i] + 0.5 * np.abs(lats[1] - lats[0]),
+            )
+            for i in range(len(lats))
+        ],
+    )
+
+    lons = f.select("XDim").get().astype(np.float64)
+    lon_coord = iris.coords.DimCoord(
+        lons,
+        standard_name="longitude",
+        units="degrees_east",
+        bounds=[
+            (  # calculate bounds as midpoint between adjacent longitudes, assuming regular grid
+                lons[i] - 0.5 * np.abs(lons[1] - lons[0]),
+                lons[i] + 0.5 * np.abs(lons[1] - lons[0]),
+            )
+            for i in range(len(lons))
+        ],
+    )
+
+    if short_name == "od550aer":
+        # add auxiliary coordinate for wavelength (550 nm for this variable, which is AOD at 550 nm)
+        wavelength_coord = iris.coords.AuxCoord(
+            [550],
+            standard_name="radiation_wavelength",
+            var_name="wavelength",
+            units="nm",
+        )
+        aux_coords_and_dims = [
+            (
+                wavelength_coord,
+                None,
+            ),  # wavelength is a scalar auxiliary coordinate
+        ]
+    else:
+        aux_coords_and_dims = (
+            None  # no auxiliary coordinates for other variables
+        )
+
+    cube = iris.cube.Cube(
+        data,
+        long_name=data_obj.attributes().get("long_name", raw_name),
+        var_name=short_name,
+        units=units_str,
+        dim_coords_and_dims=[
+            (time_coord, 0),
+            (lat_coord, 1),
+            (lon_coord, 2),
+        ],
+        aux_coords_and_dims=aux_coords_and_dims,
+    )
+
+    return cube
+
+
+def convert(
+    cube: iris.cube.Cube,
+    cmor_info,
+    attrs,
+) -> iris.cube.Cube:
+    """Convert cube to CMOR standards based on data type and cmor_info."""
+
+    if attrs.get("reference") is None:
+        attrs["reference"] = cmor_info.attributes["reference"]
+
+    cube.convert_units(cmor_info.units)
+
+    if cube.coord("longitude").points.min() < 0:
+        # convert from [-180, 180] to [0, 360]
+        cube.coord("longitude").points = cube.coord("longitude").points + 180.0
+        # roll the data as part of the longitude conversion to maintain correct order (assuming regular grid and that longitude is the last dimension)
+        nlon = len(cube.coord("longitude").points)
+        dim_lon = 2
+        cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=dim_lon)
+
+    if np.diff(cube.coord("latitude").points)[0] < 0:
+        # convert [90,-90] to [-90,90]
+        cube.coord("latitude").points = cube.coord("latitude").points[::-1]
+        # flip the data
+        cube.data = cube.data[:, ::-1, :]  # latitude is axis=1
+
+    utils.set_global_atts(cube, attrs)
+    utils.fix_var_metadata(cube, cmor_info)
+    utils.fix_dim_coordnames(cube)
+
+    utils.fix_coords(cube)
+
+    return cube
+
+
+def _extract_variable(
+    filepaths: list,
+    var_d,
+    attrs,
+    cmor_table,
+    cfg,
+    in_dir: str,
+    out_dir: str,
+    year: int,
+) -> None:
+    """Extract variable data from input files, convert to CMOR standards, and save."""
+
+    # add mip to attributes for use in cmor table lookup and global attributes
+    attrs["mip"] = var_d["mip"]
+
+    # get custom cmor table for the target project if specified
+    if var_d.get("project_id"):
+        cmor_table_b = CMOR_TABLES.get(var_d.get("project_id"))
+        cmor_info = cmor_table_b.get_variable(
+            var_d.get("mip"), var_d.get("short_name")
+        )
+    else:
+        cmor_info = cmor_table.get_variable(
+            var_d.get("mip"), var_d.get("short_name")
+        )
+
+    # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table
+    if var_d.get("reference"):
+        attrs["reference"] = var_d.get("reference")
+
+    if not cmor_table:
+        raise Exception(
+            f'project_id "{var_d.get("project_id", "None")}" is invalid, no CMOR table found. Valid options: {", ".join(list(CMOR_TABLES.keys()))}'
+        )
+
+    # check if cmor info was found for the variable, if not log an error and skip
+    if cmor_info is None:
+        logger.error(
+            f"CMOR info not found for variable {var_d.get('short_name')} in mip {var_d.get('mip')}"
+        )
+        return
+    logger.debug(
+        f"CMOR info for variable {var_d.get('short_name')}: {cmor_info}"
+    )
+
+    logger.info(
+        f"CMORizing variable {var_d.get('short_name')} from input set {var_d.get('raw_name')}"
+    )
+
+    cubes = iris.cube.CubeList()
+    for filepath in filepaths:
+        month = get_month_from_filepath(filepath, cfg)
+        logger.debug(
+            f"File: {filepath} -> var={var_d.get('short_name')} year={year}, month={month}"
+        )
+        cube = read_hdf(in_dir, filepath, year, month, **var_d)
+
+        logger.debug(
+            f"Read cube for variable {var_d.get('short_name')}: {cube}, with attributes: {cube.attributes}"
+        )
+        cubes.append(cube)
+
+    if len(cubes) > 0:
+        cube = cubes.concatenate()[
+            0
+        ]  # concatenate along time dimension and take the resulting cube
+    else:
+        cube = cubes[0]
+
+    cube = convert(cube, cmor_info, attrs)
+
+    # add dataset_id to global attributes for provenance
+    cube_attrs = cube.attributes.globals
+    cube_attrs["dataset_id"] = attrs["dataset_id"]
+    logger.debug(f"Saving variable {cube} with attributes: {cube_attrs}")
+
+    utils.save_variable(
+        cube,
+        var_d.get("short_name"),
+        out_dir,
+        cube_attrs,
+        unlimited_dimensions=["time"],
+    )
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Run CMORizer for MODIS."""
+
+    cmor_table = cfg["cmor_table"]
+    glob_attrs = cfg["attributes"]
+
+    # get relevant files
+    filepaths = collect_files(in_dir, cfg)
+
+    # group by year
+    years_d = group_files_by_year(filepaths, cfg)
+    years = sorted(list(years_d.keys()))
+
+    # iterate over the variables to be CMORized
+    for var, var_d in cfg["variables"].items():
+        logger.info(
+            "CMORizing var %s from input set %s", var, var_d["raw_name"]
+        )
+        var_d["short_name"] = var
+        logger.debug("\n" + format(var_d))
+
+        # cmorize by year, merging all months at the end
+        for year in years:
+            _extract_variable(
+                years_d[year],
+                var_d,
+                glob_attrs,
+                cmor_table,
+                cfg,
+                in_dir,
+                out_dir,
+                year,
+            )
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py
new file mode 100644
index 0000000000..d7e2b5f088
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py
@@ -0,0 +1,447 @@
+"""ESMValTool CMORizer for MODIS terra data.
+
+Tier
+    Tier 3
+
+Source
+    https://ladsweb.modaps.eosdis.nasa.gov/search/order
+
+Requirements
+  python hdf reader, https://pypi.org/project/pyhdf/
+
+Modification history
+    20260216-Fruttarol_Noah: Written based on the original MODIS CMORization NCL script created by evaldsson_martin, which can be found in at ESMValTool/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
+
+
+"""
+
+import logging
+import os
+import re
+from datetime import datetime, timedelta
+
+import iris
+import numpy as np
+from dask import array as da
+from esmvalcore.cmor.table import CMOR_TABLES
+from pyhdf.SD import SD, SDC
+
+from esmvaltool.cmorizers.data import utilities as utils
+
+logger = logging.getLogger(__name__)
+
+
+def collect_files(in_dir: str, cfg) -> list:
+    """Compose input file list and download if missing."""
+
+    attrs = cfg["attributes"]
+    # rootpath / Tier{tier}/{dataset}/{version}/{frequency}/{short_name}
+    # in_dir  = rootpath / Tier{tier}/{dataset}
+    in_dir = os.path.join(in_dir)
+    fname_pattern = (
+        f"{attrs['version']}\\.A.*\\.hdf$"  # e.g. "MOD08_M3.A*.hdf"
+    )
+
+    files_a = next(os.walk(in_dir), (None, None, []))[
+        2
+    ]  # get list of files in in_dir
+    files = []
+    for filename in files_a:
+        if re.search(fname_pattern, filename):
+            files.append(filename)
+
+    if len(files) == 0:
+        raise Exception("No files found", in_dir)
+
+    return files
+
+
+def get_year_from_filepath(filepath: str, cfg) -> int:
+    """get year from first four characters of last section of basename"""
+
+    attrs = cfg["attributes"]
+    basename = os.path.splitext(os.path.basename(filepath))[0]
+    offset = len(
+        f"{attrs['version']}.A"
+    )  # length of version string + dot + 'A' character
+    year = int(basename[offset : offset + 4])
+    return year
+
+
+def get_month_from_filepath(filepath: str, cfg) -> int:
+    """get month from timerange section of basename"""
+
+    attrs = cfg["attributes"]
+    basename = os.path.splitext(os.path.basename(filepath))[0]
+    offset = len(
+        f"{attrs['version']}.A"
+    )  # length of version string + dot + 'A' character
+    doy = int(basename[offset + 4 : offset + 7])  # day of year
+    year = int(basename[offset : offset + 4])
+    # Convert day-of-year to month
+    date = datetime(year, 1, 1) + timedelta(days=doy - 1)
+    month = date.month
+    logger.debug(
+        f"Extracted month {month}, doy {doy}, year {year} from filepath {filepath}"
+    )
+    return month
+
+
+def group_files_by_year(filepaths: list, cfg) -> dict:
+    """group filepaths by year using get_year_from_filepath function"""
+    years_D = dict()
+    for filename in filepaths:
+        year = get_year_from_filepath(filename, cfg)
+
+        if year not in years_D:
+            years_D[year] = [filename]
+        else:
+            years_D[year].append(filename)
+
+    return years_D
+
+
+def read_hdf(
+    in_dir: str,
+    filepath: str,
+    year: int,
+    month: int,
+    raw_name: str,
+    short_name: str,
+    **extras,
+):
+    """Read HDF file and build iris cube with auxiliary data for special variables."""
+    f = SD(os.path.join(in_dir, filepath), SDC.READ)
+
+    data_obj = f.select(raw_name)
+    data = data_obj.get().astype(np.float32)
+
+    # mask fill values
+    if hasattr(data_obj, "fill_value"):
+        data[data == data_obj.fill_value] = np.nan
+
+    # mask values outside valid range
+    if hasattr(data_obj, "valid_range"):
+        valid_range = data_obj.valid_range
+        logger.debug(f"Masking {short_name} data outside range {valid_range}")
+        data[(data < valid_range[0]) | (data > valid_range[1])] = np.nan
+
+    # apply scale factor and add offset if they exist
+    if hasattr(data_obj, "scale_factor"):
+        scale_factor = data_obj.scale_factor
+        logger.debug(f"Scaling {short_name} data by {scale_factor}")
+        data *= scale_factor
+    if hasattr(data_obj, "add_offset"):
+        add_offset = data_obj.add_offset
+        logger.debug(f"Offsetting {short_name} data by {add_offset}")
+        data += add_offset
+
+    # for certain variables, we need to read additional datasets to compute the final variable
+
+    # liquid water path in-cloud, need to use cirrus fraction and cloud fraction to estimate liquid fraction, then multiply by in-cloud lwp to get grid-box average lwp
+    if short_name in ["clwvi"]:
+        cfirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
+        cfin = cfirFm_obj.get().astype(np.float32)
+        # unpack with scale_factor and add_offset
+        cif = (cfin * cfirFm_obj.scale_factor) + cfirFm_obj.add_offset
+        cif[cif > 0.999] = np.nan
+
+        cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
+        cfmm = cfmm_obj.get().astype(np.float32)
+        ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
+
+        # liquid fraction estimated assuming random overlap:
+        # ctot = 1 - (1 - cif) * (1 - lif)
+        # --> lif = 1 - (1 - ctot) / (1 - cif)
+        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
+        lif[lif < 0] = 0
+
+        # ice water path
+        cwpimm_obj = f.select("Cloud_Water_Path_Ice_Mean_Mean")
+        cwpimm = cwpimm_obj.get().astype(np.float32)
+        cwpimm_fv = cwpimm_obj._FillValue
+        cwpimm[cwpimm == cwpimm_fv] = np.nan  # mask all fill values
+
+        # convert iwp in-cloud to gridbox avg using masked cirrus fraction
+        iwp = cwpimm * cif
+
+        # liquid water path
+        # convert lwp in-cloud value to grid-box avg
+        lwp = data * lif
+
+        data = lwp + iwp
+
+    # these variables are all in-cloud values, so we need to multiply by cirrus fraction to get grid-box average
+    elif short_name in ["clivi", "iwpStderr", "lwpStderr"]:
+        # cirrus fraction (0-1)
+        cfswirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
+        cfswirFm = cfswirFm_obj.get().astype(np.float32)
+        # unpack with scale_factor and add_offset
+        cf = (cfswirFm * cfswirFm_obj.scale_factor) + cfswirFm_obj.add_offset
+        cf[cf > 0.999] = np.nan
+
+        # lwpStderr is only for liquid water path, so we want to use the liquid fraction to convert to grid-box average
+        if short_name in ["lwpStderr"]:
+            cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
+            cfmm = cfmm_obj.get().astype(np.float32)
+            # unpack with scale_factor and add_offset
+            ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
+
+            cif = cf
+            cif[cf > 0.999] = np.nan
+
+            cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
+            cf[cf < 0] = 0
+
+        data *= cf  # "grid-box average" lwp/iwp
+
+    # mask all null values (nan, inf)
+    data = np.ma.masked_invalid(data)
+
+    # Handle units - convert invalid units to '1'
+    units_str = data_obj.attributes().get("units", "1")
+    if units_str in ["none", "None", ""]:
+        units_str = "1"
+
+    # Create time coordinate for the start of the month, with bounds covering the whole month
+    time_point = datetime(
+        year=year, month=month, day=15
+    )  # use 15th as representative time for the month
+    time_bounds_lower = datetime(year=year, month=month, day=1)
+    time_bounds_upper = datetime(
+        year=year + (month == 12), month=month + 1 - (month == 12) * 12, day=1
+    ) - timedelta(
+        days=1
+    )  # end of month is the day before the first day of the next month
+
+    # Convert time to days since 1850-01-01
+    time_units = datetime(1850, 1, 1)
+    delta_1850 = (time_point - time_units).days
+    # After creating time delta, add bounds:
+    delta_bounds_lower = (time_bounds_lower - time_units).days
+    delta_bounds_upper = (time_bounds_upper - time_units).days
+
+    time_coord = iris.coords.DimCoord(
+        points=[delta_1850],
+        standard_name="time",
+        units="days since 1850-01-01 00:00:00",
+        bounds=[[delta_bounds_lower, delta_bounds_upper]],
+    )
+    data = data[np.newaxis, :, :]  # Add time dimension at start
+
+    lats = f.select("YDim").get().astype(np.float64)
+    lat_coord = iris.coords.DimCoord(
+        lats,
+        standard_name="latitude",
+        units="degrees_north",
+        bounds=[
+            (  # calculate bounds as midpoint between adjacent latitudes, assuming regular grid
+                lats[i] - 0.5 * np.abs(lats[1] - lats[0]),
+                lats[i] + 0.5 * np.abs(lats[1] - lats[0]),
+            )
+            for i in range(len(lats))
+        ],
+    )
+
+    lons = f.select("XDim").get().astype(np.float64)
+    lon_coord = iris.coords.DimCoord(
+        lons,
+        standard_name="longitude",
+        units="degrees_east",
+        bounds=[
+            (  # calculate bounds as midpoint between adjacent longitudes, assuming regular grid
+                lons[i] - 0.5 * np.abs(lons[1] - lons[0]),
+                lons[i] + 0.5 * np.abs(lons[1] - lons[0]),
+            )
+            for i in range(len(lons))
+        ],
+    )
+
+    if short_name == "od550aer":
+        # add auxiliary coordinate for wavelength (550 nm for this variable, which is AOD at 550 nm)
+        wavelength_coord = iris.coords.AuxCoord(
+            [550],
+            standard_name="radiation_wavelength",
+            var_name="wavelength",
+            units="nm",
+        )
+        aux_coords_and_dims = [
+            (
+                wavelength_coord,
+                None,
+            ),  # wavelength is a scalar auxiliary coordinate
+        ]
+    else:
+        aux_coords_and_dims = (
+            None  # no auxiliary coordinates for other variables
+        )
+
+    cube = iris.cube.Cube(
+        data,
+        long_name=data_obj.attributes().get("long_name", raw_name),
+        var_name=short_name,
+        units=units_str,
+        dim_coords_and_dims=[
+            (time_coord, 0),
+            (lat_coord, 1),
+            (lon_coord, 2),
+        ],
+        aux_coords_and_dims=aux_coords_and_dims,
+    )
+
+    return cube
+
+
+def convert(
+    cube: iris.cube.Cube,
+    cmor_info,
+    attrs,
+) -> iris.cube.Cube:
+    """Convert cube to CMOR standards based on data type and cmor_info."""
+
+    if attrs.get("reference") is None:
+        attrs["reference"] = cmor_info.attributes["reference"]
+
+    cube.convert_units(cmor_info.units)
+
+    if cube.coord("longitude").points.min() < 0:
+        # convert from [-180, 180] to [0, 360]
+        cube.coord("longitude").points = cube.coord("longitude").points + 180.0
+        # roll the data as part of the longitude conversion to maintain correct order (assuming regular grid and that longitude is the last dimension)
+        nlon = len(cube.coord("longitude").points)
+        dim_lon = 2
+        cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=dim_lon)
+
+    if np.diff(cube.coord("latitude").points)[0] < 0:
+        # convert [90,-90] to [-90,90]
+        cube.coord("latitude").points = cube.coord("latitude").points[::-1]
+        # flip the data
+        cube.data = cube.data[:, ::-1, :]  # latitude is axis=1
+
+    utils.set_global_atts(cube, attrs)
+    utils.fix_var_metadata(cube, cmor_info)
+    utils.fix_dim_coordnames(cube)
+
+    utils.fix_coords(cube)
+
+    return cube
+
+
+def _extract_variable(
+    filepaths: list,
+    var_d,
+    attrs,
+    cmor_table,
+    cfg,
+    in_dir: str,
+    out_dir: str,
+    year: int,
+) -> None:
+    """Extract variable data from input files, convert to CMOR standards, and save."""
+
+    # add mip to attributes for use in cmor table lookup and global attributes
+    attrs["mip"] = var_d["mip"]
+
+    # get custom cmor table for the target project if specified
+    if var_d.get("project_id"):
+        cmor_table_b = CMOR_TABLES.get(var_d.get("project_id"))
+        cmor_info = cmor_table_b.get_variable(
+            var_d.get("mip"), var_d.get("short_name")
+        )
+    else:
+        cmor_info = cmor_table.get_variable(
+            var_d.get("mip"), var_d.get("short_name")
+        )
+
+    # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table
+    if var_d.get("reference"):
+        attrs["reference"] = var_d.get("reference")
+
+    if not cmor_table:
+        raise Exception(
+            f'project_id "{var_d.get("project_id", "None")}" is invalid, no CMOR table found. Valid options: {", ".join(list(CMOR_TABLES.keys()))}'
+        )
+
+    # check if cmor info was found for the variable, if not log an error and skip
+    if cmor_info is None:
+        logger.error(
+            f"CMOR info not found for variable {var_d.get('short_name')} in mip {var_d.get('mip')}"
+        )
+        return
+    logger.debug(
+        f"CMOR info for variable {var_d.get('short_name')}: {cmor_info}"
+    )
+
+    logger.info(
+        f"CMORizing variable {var_d.get('short_name')} from input set {var_d.get('raw_name')}"
+    )
+
+    cubes = iris.cube.CubeList()
+    for filepath in filepaths:
+        month = get_month_from_filepath(filepath, cfg)
+        logger.debug(
+            f"File: {filepath} -> var={var_d.get('short_name')} year={year}, month={month}"
+        )
+        cube = read_hdf(in_dir, filepath, year, month, **var_d)
+
+        logger.debug(
+            f"Read cube for variable {var_d.get('short_name')}: {cube}, with attributes: {cube.attributes}"
+        )
+        cubes.append(cube)
+
+    if len(cubes) > 0:
+        cube = cubes.concatenate()[
+            0
+        ]  # concatenate along time dimension and take the resulting cube
+    else:
+        cube = cubes[0]
+
+    cube = convert(cube, cmor_info, attrs)
+
+    # add dataset_id to global attributes for provenance
+    cube_attrs = cube.attributes.globals
+    cube_attrs["dataset_id"] = attrs["dataset_id"]
+    logger.debug(f"Saving variable {cube} with attributes: {cube_attrs}")
+
+    utils.save_variable(
+        cube,
+        var_d.get("short_name"),
+        out_dir,
+        cube_attrs,
+        unlimited_dimensions=["time"],
+    )
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Run CMORizer for MODIS."""
+
+    cmor_table = cfg["cmor_table"]
+    glob_attrs = cfg["attributes"]
+
+    # get relevant files
+    filepaths = collect_files(in_dir, cfg)
+
+    # group by year
+    years_d = group_files_by_year(filepaths, cfg)
+    years = sorted(list(years_d.keys()))
+
+    # iterate over the variables to be CMORized
+    for var, var_d in cfg["variables"].items():
+        logger.info(
+            "CMORizing var %s from input set %s", var, var_d["raw_name"]
+        )
+        var_d["short_name"] = var
+        logger.debug("\n" + format(var_d))
+
+        # cmorize by year, merging all months at the end
+        for year in years:
+            _extract_variable(
+                years_d[year],
+                var_d,
+                glob_attrs,
+                cmor_table,
+                cfg,
+                in_dir,
+                out_dir,
+                year,
+            )
diff --git a/pyproject.toml b/pyproject.toml
index f7b907c2c6..28877fe61d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,6 +65,7 @@ dependencies = [
     "packaging",
     "pandas",
     "progressbar2",
+    "pyhdf",
     "pyproj>=2.1",
     "pys2index",
     "python-dateutil",

From 7702714619a59a589fc3e4981d4eee54d0363c0a Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Tue, 17 Feb 2026 19:24:22 +0000
Subject: [PATCH 02/13] Applied ruff format changes

---
 .../cmorizers/data/cmor_config/MISR.yml       |  23 +++
 .../data/formatters/datasets/misr.py          | 143 ++++++++++++++++++
 2 files changed, 166 insertions(+)
 create mode 100644 esmvaltool/cmorizers/data/cmor_config/MISR.yml
 create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/misr.py

diff --git a/esmvaltool/cmorizers/data/cmor_config/MISR.yml b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
new file mode 100644
index 0000000000..c7348afa5d
--- /dev/null
+++ b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
@@ -0,0 +1,23 @@
+---
+# Global attributes of NetCDF file
+attributes:
+  dataset_id: MISR
+  project_id: CMOR6
+  tier: 2
+  version: 'V0032'
+  modeling_realm: sat
+  source: 'ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/'
+  reference: 'misr'
+  comment: ''
+  files: 'MISR_AM1_CGAS_*_F15_0032.nc'
+# Variables to CMORize
+variables:
+  od550aer:
+    raw_name: Aerosol_Optical_Depth
+    mip: AERmon
+  abs550aer:
+    raw_name: Absorbing_Optical_Depth
+    mip: AERmon
+  # ae:
+  #  raw_name: Angstrom_Exponent_550_860
+  #  mip: AERmon
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/misr.py b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
new file mode 100644
index 0000000000..c07c314e3d
--- /dev/null
+++ b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
@@ -0,0 +1,143 @@
+"""ESMValTool CMORizer for MISR data.
+
+Tier
+    Tier 2
+
+Source
+    ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/
+
+"""
+
+import copy
+import datetime as dt
+import logging
+import os
+from pathlib import Path
+
+import numpy as np
+import xarray as xr
+from dask import array as da
+from esmvalcore.cmor.table import CMOR_TABLES
+
+from esmvaltool.cmorizers.data import utilities as utils
+
+logger = logging.getLogger(__name__)
+
+band550 = {"name": "green_558nm", "lambda": 558}
+
+
+def _extract_variable(short_name, var, cfg, in_dir, out_dir):
+    attrs = copy.deepcopy(cfg["attributes"])
+    attrs["mip"] = var["mip"]
+    ver = attrs["version"]
+    files = attrs["files"]
+    raw_var = var.get("raw_name", short_name)
+
+    cmor_table = CMOR_TABLES[attrs["project_id"]]
+    cmor_info = cmor_table.get_variable(var["mip"], short_name)
+
+    logger.info("CMORizing variable '%s' from file(s) '%s'", short_name, files)
+
+    """Extract variable."""
+    # load data
+    for filepath in Path(os.path.join(in_dir, ver)).glob(files):
+        xrds = xr.open_dataset(filepath, group="Aerosol_Parameter_Average")
+        xrvar = xrds.sel(Band=band550["name"], Optical_Depth_Range="all")[
+            raw_var
+        ]
+
+        # change order of latitude and longitude coordinates
+        # xrvar = xrvar.transpose()
+
+        # Add additional coordinates before converting to an iris cube, as this is easier with xarray
+
+        # Time not present in source data, needs to be added manually
+        # Determine time from filename:
+        fileparts = str(filepath).split("_")
+        year = int(fileparts[-3])
+        monthstr = fileparts[-4]
+        month = [
+            "JAN",
+            "FEB",
+            "MAR",
+            "APR",
+            "MAY",
+            "JUN",
+            "JUL",
+            "AUG",
+            "SEP",
+            "OCT",
+            "NOV",
+            "DEC",
+        ].index(monthstr) + 1
+        days_since_1850 = dt.date(year, month, 15) - dt.date(1850, 1, 1)
+        lb_since_1850 = dt.date(year, month, 1) - dt.date(1850, 1, 1)
+        if month == 12:
+            ub_since_1850 = dt.date(year + 1, 1, 1) - dt.date(1850, 1, 1)
+        else:
+            ub_since_1850 = dt.date(year, month + 1, 1) - dt.date(1850, 1, 1)
+
+        xrvar = xrvar.assign_coords(time=days_since_1850.days)
+        xrvar = xrvar.expand_dims("time", axis=2)
+        xrvar["time"].attrs["units"] = "days since 1850-01-01 00:00:00"
+
+        if short_name in ["od550aer", "abs550aer"]:
+            xrvar = xrvar.assign_coords(radiation_wavelength=band550["lambda"])
+            xrvar["radiation_wavelength"].attrs["units"] = "nm"
+
+        cube = xrvar.to_iris()
+
+        # Fix metadata
+        cube.coord("Geodetic Latitude").rename("latitude")
+        cube.coord("Geodetic Longitude").rename("longitude")
+
+        # add time bounds
+        cube.coord("time").bounds = np.array(
+            [ub_since_1850.days, lb_since_1850.days]
+        )
+
+        utils.fix_var_metadata(cube, cmor_info)
+        utils.set_global_atts(cube, attrs)
+
+        utils.fix_dim_coordnames(cube)
+
+        # When Dask tries to roll this cube, it fails because it can't chunk this properly
+        # So here we replicate the part of fix_coords that does that, except with numpy.roll
+        # instead of dask.roll.
+        # However, it seems that those last two lines had no effect on the results...
+        # cube.data has shape (360, 720, 1) i.e. (lat, lon, time)
+        # so the original code tried to roll the cube on the time axis
+        # I could hard-code the lon axis (like they did), but instead I try to autodetect it.
+        cube_coord = cube.coord("longitude")
+        logger.info("Fixing longitude...")
+        if cube_coord.ndim == 1:
+            if cube_coord.points[0] < 0.0 and cube_coord.points[-1] < 181.0:
+                cube_coord.points = cube_coord.points + 180.0
+                cube.attributes["geospatial_lon_min"] = 0.0
+                cube.attributes["geospatial_lon_max"] = 360.0
+                nlon = len(cube_coord.points)
+                axis = cube.coords().index(cube_coord)
+                shift = nlon // 2
+                cube.data = da.roll(cube.core_data(), shift, axis=axis)
+
+        utils.fix_coords(cube)
+
+        # fix the wavelength coordinate information.
+        if short_name in ["od550aer", "abs550aer"]:
+            cube.coord("radiation_wavelength").var_name = "wavelength"
+            cube.coord("wavelength").standard_name = "radiation_wavelength"
+
+        utils.set_global_atts(cube, attrs)
+
+        # Save variable
+        utils.save_variable(
+            cube, short_name, out_dir, attrs, unlimited_dimensions=["time"]
+        )
+
+
+def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
+    """Run CMORizer for MISR."""
+    cfg.pop("cmor_table")
+
+    for short_name, var in cfg["variables"].items():
+        _extract_variable(short_name, var, cfg, in_dir, out_dir)

From 3f94595855e71c0fd094e6af1bf639d99c0ad365 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Tue, 17 Feb 2026 22:14:00 +0000
Subject: [PATCH 03/13] ok

---
 esmvaltool/cmorizers/data/formatters/datasets/misr.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/esmvaltool/cmorizers/data/formatters/datasets/misr.py b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
index c07c314e3d..09088611b5 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/misr.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
@@ -6,6 +6,9 @@
 Source
     ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/
 
+Modification history
+
+
 """
 
 import copy

From 2d1228c057ce84d8fea36556ec478e68a9d3b1d2 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Wed, 18 Feb 2026 10:08:51 +0000
Subject: [PATCH 04/13] 1

---
 esmvaltool/cmorizers/data/cmor_config/MISR.yml | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/esmvaltool/cmorizers/data/cmor_config/MISR.yml b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
index c7348afa5d..d57d12bc93 100644
--- a/esmvaltool/cmorizers/data/cmor_config/MISR.yml
+++ b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
@@ -1,14 +1,13 @@
----
 # Global attributes of NetCDF file
 attributes:
-  dataset_id: MISR
-  project_id: CMOR6
+  project_id: CMIP6
+  dataset_id: MODIS-Terra
+  modeling_realm: atmos
+  type: sat
   tier: 2
   version: 'V0032'
-  modeling_realm: sat
   source: 'ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/'
   reference: 'misr'
-  comment: ''
   files: 'MISR_AM1_CGAS_*_F15_0032.nc'
 # Variables to CMORize
 variables:
@@ -18,6 +17,6 @@ variables:
   abs550aer:
     raw_name: Absorbing_Optical_Depth
     mip: AERmon
-  # ae:
-  #  raw_name: Angstrom_Exponent_550_860
-  #  mip: AERmon
+  ae:
+   raw_name: Angstrom_Exponent_550_860
+   mip: AERmon

From 2de95e9651cd0eac2f0800b3eabaa3b60bb5302d Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Thu, 19 Feb 2026 21:21:36 +0000
Subject: [PATCH 05/13] fixed problems running the cmorizer

---
 esmvaltool/cmorizers/data/cmor_config/MISR.yml |  9 +++------
 esmvaltool/cmorizers/data/datasets.yml         | 18 ++++++++++++++++++
 .../cmorizers/data/formatters/datasets/misr.py | 13 +++++++++----
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/esmvaltool/cmorizers/data/cmor_config/MISR.yml b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
index d57d12bc93..596dc07bae 100644
--- a/esmvaltool/cmorizers/data/cmor_config/MISR.yml
+++ b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
@@ -1,12 +1,12 @@
 # Global attributes of NetCDF file
 attributes:
   project_id: CMIP6
-  dataset_id: MODIS-Terra
+  dataset_id: MISR
   modeling_realm: atmos
   type: sat
-  tier: 2
+  tier: 3
   version: 'V0032'
-  source: 'ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/'
+  source: 'https://asdc.larc.nasa.gov/project/MISR/MIL3MAEN_004'
   reference: 'misr'
   files: 'MISR_AM1_CGAS_*_F15_0032.nc'
 # Variables to CMORize
@@ -17,6 +17,3 @@ variables:
   abs550aer:
     raw_name: Absorbing_Optical_Depth
     mip: AERmon
-  ae:
-   raw_name: Angstrom_Exponent_550_860
-   mip: AERmon
diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml
index 3d50453bb9..dd4c106014 100644
--- a/esmvaltool/cmorizers/data/datasets.yml
+++ b/esmvaltool/cmorizers/data/datasets.yml
@@ -1003,6 +1003,24 @@ datasets:
       yearly granularity. Note that some (most) variables are on the goldsmr4 server,
       whereas others are on the goldsmr5 server.
 
+  MISR:
+    tier: 3
+    source: https://asdc.larc.nasa.gov/project/MISR/MIL3MAEN_004
+    last_access: 2026-02-18
+    info: |
+      In Products: select "MISR", "Collection 6.1" and "L3 Atmosphere Product",
+        click on MISR_AM1_AS_L3_V4.1.
+      In Time: select from 2000-01-01 to today.
+      In Location: skip, the global domain will be applied.
+      In Files: select all.
+      Submit the order.
+      A registration is required to download the data.
+
+       Caveats
+        clwvi and clivi data are in-cloud values whereas CMIP5 models provide
+        grid-box averages --> multiply MISR clwvi and clivi values with cloud
+        fraction as a first guess
+
   MLS-AURA:
     tier: 3
     source: https://disc.gsfc.nasa.gov/datasets/ML2RHI_004/summary https://disc.gsfc.nasa.gov/datasets/ML2T_004/summary
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/misr.py b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
index 09088611b5..0a0a6ac6f1 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/misr.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
@@ -1,7 +1,7 @@
 """ESMValTool CMORizer for MISR data.
 
 Tier
-    Tier 2
+    Tier 3
 
 Source
     ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/
@@ -39,11 +39,15 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
     cmor_table = CMOR_TABLES[attrs["project_id"]]
     cmor_info = cmor_table.get_variable(var["mip"], short_name)
 
-    logger.info("CMORizing variable '%s' from file(s) '%s'", short_name, files)
-
     """Extract variable."""
     # load data
-    for filepath in Path(os.path.join(in_dir, ver)).glob(files):
+    logger.info(
+        "Loading data from file(s) '%s' in directory '%s' with version '%s'",
+        files,
+        in_dir,
+        ver,
+    )
+    for filepath in Path(os.path.join(in_dir)).glob(files):
         xrds = xr.open_dataset(filepath, group="Aerosol_Parameter_Average")
         xrvar = xrds.sel(Band=band550["name"], Optical_Depth_Range="all")[
             raw_var
@@ -133,6 +137,7 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
         utils.set_global_atts(cube, attrs)
 
         # Save variable
+        logger.info(f"Saving Cube: {cube}, in directory: {out_dir}")
         utils.save_variable(
             cube, short_name, out_dir, attrs, unlimited_dimensions=["time"]
         )

From 4cd85751023efb8bcea51badbdf019cb213575c9 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Thu, 19 Feb 2026 21:47:58 +0000
Subject: [PATCH 06/13] Bunched cmorized data files by year

---
 .../data/formatters/datasets/misr.py          | 227 +++++++++++-------
 1 file changed, 135 insertions(+), 92 deletions(-)

diff --git a/esmvaltool/cmorizers/data/formatters/datasets/misr.py b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
index 0a0a6ac6f1..2d7b3b767d 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/misr.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
@@ -12,13 +12,14 @@
 """
 
 import copy
-import datetime as dt
+import datetime
 import logging
 import os
 from pathlib import Path
 
+import iris
 import numpy as np
-import xarray as xr
+import xarray
 from dask import array as da
 from esmvalcore.cmor.table import CMOR_TABLES
 
@@ -29,6 +30,18 @@
 band550 = {"name": "green_558nm", "lambda": 558}
 
 
+def bunch_files_by_year(in_dir, files_pattern):
+    """Group files by year."""
+    files_by_year = {}
+    for filepath in Path(os.path.join(in_dir)).glob(files_pattern):
+        fileparts = str(filepath).split("_")
+        year = int(fileparts[-3])
+        if year not in files_by_year:
+            files_by_year[year] = []
+        files_by_year[year].append(filepath)
+    return files_by_year
+
+
 def _extract_variable(short_name, var, cfg, in_dir, out_dir):
     attrs = copy.deepcopy(cfg["attributes"])
     attrs["mip"] = var["mip"]
@@ -47,97 +60,127 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
         in_dir,
         ver,
     )
-    for filepath in Path(os.path.join(in_dir)).glob(files):
-        xrds = xr.open_dataset(filepath, group="Aerosol_Parameter_Average")
-        xrvar = xrds.sel(Band=band550["name"], Optical_Depth_Range="all")[
-            raw_var
-        ]
-
-        # change order of latitude and longitude coordinates
-        # xrvar = xrvar.transpose()
-
-        # Add additional coordinates before converting to an iris cube, as this is easier with xarray
-
-        # Time not present in source data, needs to be added manually
-        # Determine time from filename:
-        fileparts = str(filepath).split("_")
-        year = int(fileparts[-3])
-        monthstr = fileparts[-4]
-        month = [
-            "JAN",
-            "FEB",
-            "MAR",
-            "APR",
-            "MAY",
-            "JUN",
-            "JUL",
-            "AUG",
-            "SEP",
-            "OCT",
-            "NOV",
-            "DEC",
-        ].index(monthstr) + 1
-        days_since_1850 = dt.date(year, month, 15) - dt.date(1850, 1, 1)
-        lb_since_1850 = dt.date(year, month, 1) - dt.date(1850, 1, 1)
-        if month == 12:
-            ub_since_1850 = dt.date(year + 1, 1, 1) - dt.date(1850, 1, 1)
+    files_by_year = bunch_files_by_year(in_dir, files)
+    for year, filepaths in files_by_year.items():
+        cubes = iris.cube.CubeList()
+        for filepath in filepaths:
+            xrds = xarray.open_dataset(
+                filepath, group="Aerosol_Parameter_Average"
+            )
+            xrvar = xrds.sel(Band=band550["name"], Optical_Depth_Range="all")[
+                raw_var
+            ]
+
+            # change order of latitude and longitude coordinates
+            # xrvar = xrvar.transpose()
+
+            # Add additional coordinates before converting to an iris cube, as this is easier with xarray
+
+            # Time not present in source data, needs to be added manually
+            # Determine time from filename:
+            fileparts = str(filepath).split("_")
+            year = int(fileparts[-3])
+            monthstr = fileparts[-4]
+            month = [
+                "JAN",
+                "FEB",
+                "MAR",
+                "APR",
+                "MAY",
+                "JUN",
+                "JUL",
+                "AUG",
+                "SEP",
+                "OCT",
+                "NOV",
+                "DEC",
+            ].index(monthstr) + 1
+            days_since_1850 = datetime.date(year, month, 15) - datetime.date(
+                1850, 1, 1
+            )
+            lb_since_1850 = datetime.date(year, month, 1) - datetime.date(
+                1850, 1, 1
+            )
+            if month == 12:
+                ub_since_1850 = (
+                    datetime.date(year + 1, 1, 1)
+                    - datetime.date(1850, 1, 1)
+                    - datetime.timedelta(days=1)
+                )
+            else:
+                ub_since_1850 = (
+                    datetime.date(year, month + 1, 1)
+                    - datetime.date(1850, 1, 1)
+                    - datetime.timedelta(days=1)
+                )
+
+            xrvar = xrvar.assign_coords(time=days_since_1850.days)
+            xrvar = xrvar.expand_dims("time", axis=2)
+            xrvar["time"].attrs["units"] = "days since 1850-01-01 00:00:00"
+
+            if short_name in ["od550aer", "abs550aer"]:
+                xrvar = xrvar.assign_coords(
+                    radiation_wavelength=band550["lambda"]
+                )
+                xrvar["radiation_wavelength"].attrs["units"] = "nm"
+
+            cube = xrvar.to_iris()
+
+            # Fix metadata
+            cube.coord("Geodetic Latitude").rename("latitude")
+            cube.coord("Geodetic Longitude").rename("longitude")
+
+            # add time bounds
+            cube.coord("time").bounds = np.array(
+                [ub_since_1850.days, lb_since_1850.days]
+            )
+            cube.coord("time").units = "days since 1850-01-01 00:00:00"
+
+            utils.fix_var_metadata(cube, cmor_info)
+            utils.set_global_atts(cube, attrs)
+
+            utils.fix_dim_coordnames(cube)
+
+            # When Dask tries to roll this cube, it fails because it can't chunk this properly
+            # So here we replicate the part of fix_coords that does that, except with numpy.roll
+            # instead of dask.roll.
+            # However, it seems that those last two lines had no effect on the results...
+            # cube.data has shape (360, 720, 1) i.e. (lat, lon, time)
+            # so the original code tried to roll the cube on the time axis
+            # I could hard-code the lon axis (like they did), but instead I try to autodetect it.
+            cube_coord = cube.coord("longitude")
+            if cube_coord.ndim == 1:
+                if (
+                    cube_coord.points[0] < 0.0
+                    and cube_coord.points[-1] < 181.0
+                ):
+                    cube_coord.points = cube_coord.points + 180.0
+                    cube.attributes["geospatial_lon_min"] = 0.0
+                    cube.attributes["geospatial_lon_max"] = 360.0
+                    nlon = len(cube_coord.points)
+                    axis = cube.coords().index(cube_coord)
+                    shift = nlon // 2
+                    cube.data = da.roll(cube.core_data(), shift, axis=axis)
+
+            utils.fix_coords(cube)
+
+            # fix the wavelength coordinate information.
+            if short_name in ["od550aer", "abs550aer"]:
+                cube.coord("radiation_wavelength").var_name = "wavelength"
+                cube.coord("wavelength").standard_name = "radiation_wavelength"
+
+            utils.set_global_atts(cube, attrs)
+
+            # Save variable
+            logger.debug(f"Saving Cube: {cube}, in directory: {out_dir}")
+            cubes.append(cube)
+
+        if len(cubes) > 0:
+            cube = cubes.concatenate()[
+                0
+            ]  # concatenate along time dimension and take the resulting cube
         else:
-            ub_since_1850 = dt.date(year, month + 1, 1) - dt.date(1850, 1, 1)
-
-        xrvar = xrvar.assign_coords(time=days_since_1850.days)
-        xrvar = xrvar.expand_dims("time", axis=2)
-        xrvar["time"].attrs["units"] = "days since 1850-01-01 00:00:00"
-
-        if short_name in ["od550aer", "abs550aer"]:
-            xrvar = xrvar.assign_coords(radiation_wavelength=band550["lambda"])
-            xrvar["radiation_wavelength"].attrs["units"] = "nm"
-
-        cube = xrvar.to_iris()
-
-        # Fix metadata
-        cube.coord("Geodetic Latitude").rename("latitude")
-        cube.coord("Geodetic Longitude").rename("longitude")
-
-        # add time bounds
-        cube.coord("time").bounds = np.array(
-            [ub_since_1850.days, lb_since_1850.days]
-        )
-
-        utils.fix_var_metadata(cube, cmor_info)
-        utils.set_global_atts(cube, attrs)
-
-        utils.fix_dim_coordnames(cube)
-
-        # When Dask tries to roll this cube, it fails because it can't chunk this properly
-        # So here we replicate the part of fix_coords that does that, except with numpy.roll
-        # instead of dask.roll.
-        # However, it seems that those last two lines had no effect on the results...
-        # cube.data has shape (360, 720, 1) i.e. (lat, lon, time)
-        # so the original code tried to roll the cube on the time axis
-        # I could hard-code the lon axis (like they did), but instead I try to autodetect it.
-        cube_coord = cube.coord("longitude")
-        logger.info("Fixing longitude...")
-        if cube_coord.ndim == 1:
-            if cube_coord.points[0] < 0.0 and cube_coord.points[-1] < 181.0:
-                cube_coord.points = cube_coord.points + 180.0
-                cube.attributes["geospatial_lon_min"] = 0.0
-                cube.attributes["geospatial_lon_max"] = 360.0
-                nlon = len(cube_coord.points)
-                axis = cube.coords().index(cube_coord)
-                shift = nlon // 2
-                cube.data = da.roll(cube.core_data(), shift, axis=axis)
-
-        utils.fix_coords(cube)
-
-        # fix the wavelength coordinate information.
-        if short_name in ["od550aer", "abs550aer"]:
-            cube.coord("radiation_wavelength").var_name = "wavelength"
-            cube.coord("wavelength").standard_name = "radiation_wavelength"
-
-        utils.set_global_atts(cube, attrs)
-
-        # Save variable
-        logger.info(f"Saving Cube: {cube}, in directory: {out_dir}")
+            cube = cubes[0]
         utils.save_variable(
             cube, short_name, out_dir, attrs, unlimited_dimensions=["time"]
         )

From b4022043b239f47351c83abb315b0d35965c8143 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Thu, 19 Feb 2026 22:26:10 +0000
Subject: [PATCH 07/13] reverted
 esmvaltool/cmorizers/data/formatters/datasets/misr.py

---
 .../cmorizers/data/cmor_config/MISR.yml       |   9 +-
 esmvaltool/cmorizers/data/datasets.yml        |  18 --
 .../data/formatters/datasets/misr.py          | 227 +++++++-----------
 3 files changed, 98 insertions(+), 156 deletions(-)

diff --git a/esmvaltool/cmorizers/data/cmor_config/MISR.yml b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
index 596dc07bae..d57d12bc93 100644
--- a/esmvaltool/cmorizers/data/cmor_config/MISR.yml
+++ b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
@@ -1,12 +1,12 @@
 # Global attributes of NetCDF file
 attributes:
   project_id: CMIP6
-  dataset_id: MISR
+  dataset_id: MODIS-Terra
   modeling_realm: atmos
   type: sat
-  tier: 3
+  tier: 2
   version: 'V0032'
-  source: 'https://asdc.larc.nasa.gov/project/MISR/MIL3MAEN_004'
+  source: 'ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/'
   reference: 'misr'
   files: 'MISR_AM1_CGAS_*_F15_0032.nc'
 # Variables to CMORize
@@ -17,3 +17,6 @@ variables:
   abs550aer:
     raw_name: Absorbing_Optical_Depth
     mip: AERmon
+  ae:
+   raw_name: Angstrom_Exponent_550_860
+   mip: AERmon
diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml
index dd4c106014..3d50453bb9 100644
--- a/esmvaltool/cmorizers/data/datasets.yml
+++ b/esmvaltool/cmorizers/data/datasets.yml
@@ -1003,24 +1003,6 @@ datasets:
       yearly granularity. Note that some (most) variables are on the goldsmr4 server,
       whereas others are on the goldsmr5 server.
 
-  MISR:
-    tier: 3
-    source: https://asdc.larc.nasa.gov/project/MISR/MIL3MAEN_004
-    last_access: 2026-02-18
-    info: |
-      In Products: select "MISR", "Collection 6.1" and "L3 Atmosphere Product",
-        click on MISR_AM1_AS_L3_V4.1.
-      In Time: select from 2000-01-01 to today.
-      In Location: skip, the global domain will be applied.
-      In Files: select all.
-      Submit the order.
-      A registration is required to download the data.
-
-       Caveats
-        clwvi and clivi data are in-cloud values whereas CMIP5 models provide
-        grid-box averages --> multiply MISR clwvi and clivi values with cloud
-        fraction as a first guess
-
   MLS-AURA:
     tier: 3
     source: https://disc.gsfc.nasa.gov/datasets/ML2RHI_004/summary https://disc.gsfc.nasa.gov/datasets/ML2T_004/summary
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/misr.py b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
index 2d7b3b767d..0a0a6ac6f1 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/misr.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
@@ -12,14 +12,13 @@
 """
 
 import copy
-import datetime
+import datetime as dt
 import logging
 import os
 from pathlib import Path
 
-import iris
 import numpy as np
-import xarray
+import xarray as xr
 from dask import array as da
 from esmvalcore.cmor.table import CMOR_TABLES
 
@@ -30,18 +29,6 @@
 band550 = {"name": "green_558nm", "lambda": 558}
 
 
-def bunch_files_by_year(in_dir, files_pattern):
-    """Group files by year."""
-    files_by_year = {}
-    for filepath in Path(os.path.join(in_dir)).glob(files_pattern):
-        fileparts = str(filepath).split("_")
-        year = int(fileparts[-3])
-        if year not in files_by_year:
-            files_by_year[year] = []
-        files_by_year[year].append(filepath)
-    return files_by_year
-
-
 def _extract_variable(short_name, var, cfg, in_dir, out_dir):
     attrs = copy.deepcopy(cfg["attributes"])
     attrs["mip"] = var["mip"]
@@ -60,127 +47,97 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
         in_dir,
         ver,
     )
-    files_by_year = bunch_files_by_year(in_dir, files)
-    for year, filepaths in files_by_year.items():
-        cubes = iris.cube.CubeList()
-        for filepath in filepaths:
-            xrds = xarray.open_dataset(
-                filepath, group="Aerosol_Parameter_Average"
-            )
-            xrvar = xrds.sel(Band=band550["name"], Optical_Depth_Range="all")[
-                raw_var
-            ]
-
-            # change order of latitude and longitude coordinates
-            # xrvar = xrvar.transpose()
-
-            # Add additional coordinates before converting to an iris cube, as this is easier with xarray
-
-            # Time not present in source data, needs to be added manually
-            # Determine time from filename:
-            fileparts = str(filepath).split("_")
-            year = int(fileparts[-3])
-            monthstr = fileparts[-4]
-            month = [
-                "JAN",
-                "FEB",
-                "MAR",
-                "APR",
-                "MAY",
-                "JUN",
-                "JUL",
-                "AUG",
-                "SEP",
-                "OCT",
-                "NOV",
-                "DEC",
-            ].index(monthstr) + 1
-            days_since_1850 = datetime.date(year, month, 15) - datetime.date(
-                1850, 1, 1
-            )
-            lb_since_1850 = datetime.date(year, month, 1) - datetime.date(
-                1850, 1, 1
-            )
-            if month == 12:
-                ub_since_1850 = (
-                    datetime.date(year + 1, 1, 1)
-                    - datetime.date(1850, 1, 1)
-                    - datetime.timedelta(days=1)
-                )
-            else:
-                ub_since_1850 = (
-                    datetime.date(year, month + 1, 1)
-                    - datetime.date(1850, 1, 1)
-                    - datetime.timedelta(days=1)
-                )
-
-            xrvar = xrvar.assign_coords(time=days_since_1850.days)
-            xrvar = xrvar.expand_dims("time", axis=2)
-            xrvar["time"].attrs["units"] = "days since 1850-01-01 00:00:00"
-
-            if short_name in ["od550aer", "abs550aer"]:
-                xrvar = xrvar.assign_coords(
-                    radiation_wavelength=band550["lambda"]
-                )
-                xrvar["radiation_wavelength"].attrs["units"] = "nm"
-
-            cube = xrvar.to_iris()
-
-            # Fix metadata
-            cube.coord("Geodetic Latitude").rename("latitude")
-            cube.coord("Geodetic Longitude").rename("longitude")
-
-            # add time bounds
-            cube.coord("time").bounds = np.array(
-                [ub_since_1850.days, lb_since_1850.days]
-            )
-            cube.coord("time").units = "days since 1850-01-01 00:00:00"
-
-            utils.fix_var_metadata(cube, cmor_info)
-            utils.set_global_atts(cube, attrs)
-
-            utils.fix_dim_coordnames(cube)
-
-            # When Dask tries to roll this cube, it fails because it can't chunk this properly
-            # So here we replicate the part of fix_coords that does that, except with numpy.roll
-            # instead of dask.roll.
-            # However, it seems that those last two lines had no effect on the results...
-            # cube.data has shape (360, 720, 1) i.e. (lat, lon, time)
-            # so the original code tried to roll the cube on the time axis
-            # I could hard-code the lon axis (like they did), but instead I try to autodetect it.
-            cube_coord = cube.coord("longitude")
-            if cube_coord.ndim == 1:
-                if (
-                    cube_coord.points[0] < 0.0
-                    and cube_coord.points[-1] < 181.0
-                ):
-                    cube_coord.points = cube_coord.points + 180.0
-                    cube.attributes["geospatial_lon_min"] = 0.0
-                    cube.attributes["geospatial_lon_max"] = 360.0
-                    nlon = len(cube_coord.points)
-                    axis = cube.coords().index(cube_coord)
-                    shift = nlon // 2
-                    cube.data = da.roll(cube.core_data(), shift, axis=axis)
-
-            utils.fix_coords(cube)
-
-            # fix the wavelength coordinate information.
-            if short_name in ["od550aer", "abs550aer"]:
-                cube.coord("radiation_wavelength").var_name = "wavelength"
-                cube.coord("wavelength").standard_name = "radiation_wavelength"
-
-            utils.set_global_atts(cube, attrs)
-
-            # Save variable
-            logger.debug(f"Saving Cube: {cube}, in directory: {out_dir}")
-            cubes.append(cube)
-
-        if len(cubes) > 0:
-            cube = cubes.concatenate()[
-                0
-            ]  # concatenate along time dimension and take the resulting cube
+    for filepath in Path(os.path.join(in_dir)).glob(files):
+        xrds = xr.open_dataset(filepath, group="Aerosol_Parameter_Average")
+        xrvar = xrds.sel(Band=band550["name"], Optical_Depth_Range="all")[
+            raw_var
+        ]
+
+        # change order of latitude and longitude coordinates
+        # xrvar = xrvar.transpose()
+
+        # Add additional coordinates before converting to an iris cube, as this is easier with xarray
+
+        # Time not present in source data, needs to be added manually
+        # Determine time from filename:
+        fileparts = str(filepath).split("_")
+        year = int(fileparts[-3])
+        monthstr = fileparts[-4]
+        month = [
+            "JAN",
+            "FEB",
+            "MAR",
+            "APR",
+            "MAY",
+            "JUN",
+            "JUL",
+            "AUG",
+            "SEP",
+            "OCT",
+            "NOV",
+            "DEC",
+        ].index(monthstr) + 1
+        days_since_1850 = dt.date(year, month, 15) - dt.date(1850, 1, 1)
+        lb_since_1850 = dt.date(year, month, 1) - dt.date(1850, 1, 1)
+        if month == 12:
+            ub_since_1850 = dt.date(year + 1, 1, 1) - dt.date(1850, 1, 1)
         else:
-            cube = cubes[0]
+            ub_since_1850 = dt.date(year, month + 1, 1) - dt.date(1850, 1, 1)
+
+        xrvar = xrvar.assign_coords(time=days_since_1850.days)
+        xrvar = xrvar.expand_dims("time", axis=2)
+        xrvar["time"].attrs["units"] = "days since 1850-01-01 00:00:00"
+
+        if short_name in ["od550aer", "abs550aer"]:
+            xrvar = xrvar.assign_coords(radiation_wavelength=band550["lambda"])
+            xrvar["radiation_wavelength"].attrs["units"] = "nm"
+
+        cube = xrvar.to_iris()
+
+        # Fix metadata
+        cube.coord("Geodetic Latitude").rename("latitude")
+        cube.coord("Geodetic Longitude").rename("longitude")
+
+        # add time bounds
+        cube.coord("time").bounds = np.array(
+            [ub_since_1850.days, lb_since_1850.days]
+        )
+
+        utils.fix_var_metadata(cube, cmor_info)
+        utils.set_global_atts(cube, attrs)
+
+        utils.fix_dim_coordnames(cube)
+
+        # When Dask tries to roll this cube, it fails because it can't chunk this properly
+        # So here we replicate the part of fix_coords that does that, except with numpy.roll
+        # instead of dask.roll.
+        # However, it seems that those last two lines had no effect on the results...
+        # cube.data has shape (360, 720, 1) i.e. (lat, lon, time)
+        # so the original code tried to roll the cube on the time axis
+        # I could hard-code the lon axis (like they did), but instead I try to autodetect it.
+        cube_coord = cube.coord("longitude")
+        logger.info("Fixing longitude...")
+        if cube_coord.ndim == 1:
+            if cube_coord.points[0] < 0.0 and cube_coord.points[-1] < 181.0:
+                cube_coord.points = cube_coord.points + 180.0
+                cube.attributes["geospatial_lon_min"] = 0.0
+                cube.attributes["geospatial_lon_max"] = 360.0
+                nlon = len(cube_coord.points)
+                axis = cube.coords().index(cube_coord)
+                shift = nlon // 2
+                cube.data = da.roll(cube.core_data(), shift, axis=axis)
+
+        utils.fix_coords(cube)
+
+        # fix the wavelength coordinate information.
+        if short_name in ["od550aer", "abs550aer"]:
+            cube.coord("radiation_wavelength").var_name = "wavelength"
+            cube.coord("wavelength").standard_name = "radiation_wavelength"
+
+        utils.set_global_atts(cube, attrs)
+
+        # Save variable
+        logger.info(f"Saving Cube: {cube}, in directory: {out_dir}")
         utils.save_variable(
             cube, short_name, out_dir, attrs, unlimited_dimensions=["time"]
         )

From ffd2efe18988b1657e563ec72792891cecf7292b Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Tue, 24 Feb 2026 19:27:42 +0000
Subject: [PATCH 08/13] added exspected data for cmip6 standard

---
 .../cmorizers/data/cmor_config/MISR.yml       |  9 ++----
 esmvaltool/cmorizers/data/datasets.yml        | 10 +++++++
 .../data/formatters/datasets/misr.py          | 29 ++++++++++++++-----
 esmvaltool/config-references.yml              |  4 +++
 esmvaltool/references/misr.bibtex             | 11 +++++++
 5 files changed, 50 insertions(+), 13 deletions(-)
 create mode 100644 esmvaltool/references/misr.bibtex

diff --git a/esmvaltool/cmorizers/data/cmor_config/MISR.yml b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
index d57d12bc93..596dc07bae 100644
--- a/esmvaltool/cmorizers/data/cmor_config/MISR.yml
+++ b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
@@ -1,12 +1,12 @@
 # Global attributes of NetCDF file
 attributes:
   project_id: CMIP6
-  dataset_id: MODIS-Terra
+  dataset_id: MISR
   modeling_realm: atmos
   type: sat
-  tier: 2
+  tier: 3
   version: 'V0032'
-  source: 'ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/'
+  source: 'https://asdc.larc.nasa.gov/project/MISR/MIL3MAEN_004'
   reference: 'misr'
   files: 'MISR_AM1_CGAS_*_F15_0032.nc'
 # Variables to CMORize
@@ -17,6 +17,3 @@ variables:
   abs550aer:
     raw_name: Absorbing_Optical_Depth
     mip: AERmon
-  ae:
-   raw_name: Angstrom_Exponent_550_860
-   mip: AERmon
diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml
index 3d50453bb9..7b727f1831 100644
--- a/esmvaltool/cmorizers/data/datasets.yml
+++ b/esmvaltool/cmorizers/data/datasets.yml
@@ -1030,6 +1030,16 @@ datasets:
     info: |
       Download the file MPI_MOBO-DIC_2004-2019_v2.nc
 
+  MISR:
+    tier: 3
+    source: https://asdc.larc.nasa.gov/project/MISR/MIL3MAEN_004
+    last_access: 2026-02-17
+    info: |
+      To obtain the data set you will need to go to the Data Distribution
+      section of the link, create an account/sign in on the earthdata.nasa.gov website
+      and then follow the instructions for downloading the data.
+      The data is currently freely available, but a registration is required.
+
   MODIS:
     tier: 3
     source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/misr.py b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
index 0a0a6ac6f1..c72a37d3fb 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/misr.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/misr.py
@@ -5,10 +5,6 @@
 
 Source
     ftp://l5ftl01.larc.nasa.gov/MISR/MIL3MAEN.004/
-
-Modification history
-
-
 """
 
 import copy
@@ -41,7 +37,7 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
 
     """Extract variable."""
     # load data
-    logger.info(
+    logger.debug(
         "Loading data from file(s) '%s' in directory '%s' with version '%s'",
         files,
         in_dir,
@@ -116,7 +112,7 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
         # so the original code tried to roll the cube on the time axis
         # I could hard-code the lon axis (like they did), but instead I try to autodetect it.
         cube_coord = cube.coord("longitude")
-        logger.info("Fixing longitude...")
+        logger.debug("Fixing longitude...")
         if cube_coord.ndim == 1:
             if cube_coord.points[0] < 0.0 and cube_coord.points[-1] < 181.0:
                 cube_coord.points = cube_coord.points + 180.0
@@ -127,6 +123,24 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
                 shift = nlon // 2
                 cube.data = da.roll(cube.core_data(), shift, axis=axis)
 
+        if np.diff(cube.coord("latitude").points)[0] < 0:
+            # convert [90,-90] to [-90,90]
+            cube.coord("latitude").points = cube.coord("latitude").points[::-1]
+            # flip the data
+            cube.data = cube.data[:, ::-1, :]  # latitude is axis=1
+
+        lat_bounds = []
+        for lat in cube.coord("latitude").points:
+            lat_bounds.append([lat - 0.25, lat + 0.25])
+        lat_bounds = np.array(lat_bounds)
+        cube.coord("latitude").bounds = lat_bounds.reshape(-1, 2)
+
+        lon_bounds = []
+        for lon in cube.coord("longitude").points:
+            lon_bounds.append([lon - 0.25, lon + 0.25])
+        lon_bounds = np.array(lon_bounds)
+        cube.coord("longitude").bounds = lon_bounds.reshape(-1, 2)
+
         utils.fix_coords(cube)
 
         # fix the wavelength coordinate information.
@@ -137,7 +151,7 @@ def _extract_variable(short_name, var, cfg, in_dir, out_dir):
         utils.set_global_atts(cube, attrs)
 
         # Save variable
-        logger.info(f"Saving Cube: {cube}, in directory: {out_dir}")
+        logger.debug(f"Saving Cube: {cube}, in directory: {out_dir}")
         utils.save_variable(
             cube, short_name, out_dir, attrs, unlimited_dimensions=["time"]
         )
@@ -148,4 +162,5 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
     cfg.pop("cmor_table")
 
     for short_name, var in cfg["variables"].items():
+        logger.info(f"CMORizing variable '{short_name}'")
         _extract_variable(short_name, var, cfg, in_dir, out_dir)
diff --git a/esmvaltool/config-references.yml b/esmvaltool/config-references.yml
index 939696123f..f0aed75a6d 100644
--- a/esmvaltool/config-references.yml
+++ b/esmvaltool/config-references.yml
@@ -229,6 +229,10 @@ authors:
     name: Winterstein, Franziska
     institute: DLR, Germany
     orcid: https://orcid.org/0000-0002-2406-4936
+  fruttarol_noah:
+    name: Fruttarol, Noah
+    institute: CCCma, ECCC, Canada
+    github: noahfruttarol
   fuckar_neven:
     name: Fuckar, Neven
     institute: BSC, Spain
diff --git a/esmvaltool/references/misr.bibtex b/esmvaltool/references/misr.bibtex
new file mode 100644
index 0000000000..577d498694
--- /dev/null
+++ b/esmvaltool/references/misr.bibtex
@@ -0,0 +1,11 @@
+@misc{misr,
+	doi = {10.5067/Terra/MISR/MIL3MAEN_L3.004},
+	url = {https://doi.org/10.5067/Terra/MISR/MIL3MAEN_L3.004},
+	publisher = {NASA Langley Atmospheric Science Data Center DAAC},
+	title = {MISR Level 3 Component Global Aerosol product in netCDF format covering a month V004},
+	author = {NASA/LARC/SD/ASDC},
+	date = {2008-09-26},
+	year = 2008,
+	month = 9,
+	day = 26,
+}

From 7037ee76f553883f9e73517e6cbb718fa7d12369 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Thu, 5 Mar 2026 21:05:07 +0000
Subject: [PATCH 09/13] fixed project_id

---
 esmvaltool/cmorizers/data/cmor_config/MISR.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvaltool/cmorizers/data/cmor_config/MISR.yml b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
index 596dc07bae..351cfbfede 100644
--- a/esmvaltool/cmorizers/data/cmor_config/MISR.yml
+++ b/esmvaltool/cmorizers/data/cmor_config/MISR.yml
@@ -1,6 +1,6 @@
 # Global attributes of NetCDF file
 attributes:
-  project_id: CMIP6
+  project_id: OBS6
   dataset_id: MISR
   modeling_realm: atmos
   type: sat

From 0a19731f7347b9445e5e9220415294768558e1a1 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Thu, 12 Mar 2026 17:45:18 +0000
Subject: [PATCH 10/13] Minor changes and added reference

---
 .../cmorizers/data/cmor_config/MODIS-Aqua.yml |  6 ++---
 .../data/cmor_config/MODIS-Terra.yml          |  6 ++---
 esmvaltool/cmorizers/data/datasets.yml        | 22 +++++++++++++++++--
 .../data/formatters/datasets/modis_aqua.py    | 14 ++++--------
 .../data/formatters/datasets/modis_terra.py   | 14 ++++--------
 esmvaltool/config-references.yml              |  4 ++++
 6 files changed, 36 insertions(+), 30 deletions(-)

diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
index 29329712c9..812e750d09 100644
--- a/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
+++ b/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
@@ -30,14 +30,12 @@ variables:
     frequency: mon
   lwpStderr:
     raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty
-    mip: Amon
+    mip: custom
     frequency: mon
-    project_id: custom
   iwpStderr:
     raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty
-    mip: Amon
+    mip: custom
     frequency: mon
-    project_id: custom
   od550aer:
     raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean
     mip: AERmon
diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
index 94a3a686cc..e30f90835f 100644
--- a/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
+++ b/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
@@ -30,14 +30,12 @@ variables:
     frequency: mon
   lwpStderr:
     raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty
-    mip: Amon
+    mip: custom
     frequency: mon
-    project_id: custom
   iwpStderr:
     raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty
-    mip: Amon
+    mip: custom
     frequency: mon
-    project_id: custom
   od550aer:
     raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean
     mip: AERmon
diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml
index 3d50453bb9..6c7613a20d 100644
--- a/esmvaltool/cmorizers/data/datasets.yml
+++ b/esmvaltool/cmorizers/data/datasets.yml
@@ -1030,10 +1030,10 @@ datasets:
     info: |
       Download the file MPI_MOBO-DIC_2004-2019_v2.nc
 
-  MODIS:
+  MODIS-Aqua:
     tier: 3
     source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
-    last_access: 2019-02-09
+    last_access: 2026-01-30
     info: |
       In Products: select "MODIS Aqua", "Collection 6.1" and
         "L3 Atmosphere Product", click on MYD08_M3.
@@ -1048,6 +1048,24 @@ datasets:
         grid-box averages --> multiply MODIS clwvi and clivi values with cloud
         fraction as a first guess
 
+  MODIS-Terra:
+    tier: 3
+    source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
+    last_access: 2026-01-30
+    info: |
+      In Products: select "MODIS Terra", "Collection 6.1" and
+        "L3 Atmosphere Product", click on MOD08_M3.
+      In Time: select from 2000-01-01 to today.
+      In Location: skip, the global domain will be applied.
+      In Files: select all.
+      Submit the order.
+      A registration is required to download the data.
+
+      Caveats
+        clwvi and clivi data are in-cloud values whereas CMIP5 models provide
+        grid-box averages --> multiply MODIS clwvi and clivi values with cloud
+        fraction as a first guess
+
   MTE:
     tier: 3
     source: http://www.bgc-jena.mpg.de/geodb/BGI/Home
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
index f43bdc8d2b..80859e31f3 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
@@ -342,16 +342,10 @@ def _extract_variable(
     # add mip to attributes for use in cmor table lookup and global attributes
     attrs["mip"] = var_d["mip"]
 
-    # get custom cmor table for the target project if specified
-    if var_d.get("project_id"):
-        cmor_table_b = CMOR_TABLES.get(var_d.get("project_id"))
-        cmor_info = cmor_table_b.get_variable(
-            var_d.get("mip"), var_d.get("short_name")
-        )
-    else:
-        cmor_info = cmor_table.get_variable(
-            var_d.get("mip"), var_d.get("short_name")
-        )
+    # get cmor table for variable
+    cmor_info = cmor_table.get_variable(
+        var_d.get("mip"), var_d.get("short_name")
+    )
 
     # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table
     if var_d.get("reference"):
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py
index d7e2b5f088..53e942be64 100644
--- a/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py
+++ b/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py
@@ -342,16 +342,10 @@ def _extract_variable(
     # add mip to attributes for use in cmor table lookup and global attributes
     attrs["mip"] = var_d["mip"]
 
-    # get custom cmor table for the target project if specified
-    if var_d.get("project_id"):
-        cmor_table_b = CMOR_TABLES.get(var_d.get("project_id"))
-        cmor_info = cmor_table_b.get_variable(
-            var_d.get("mip"), var_d.get("short_name")
-        )
-    else:
-        cmor_info = cmor_table.get_variable(
-            var_d.get("mip"), var_d.get("short_name")
-        )
+    # get the cmor table for the variable
+    cmor_info = cmor_table.get_variable(
+        var_d.get("mip"), var_d.get("short_name")
+    )
 
     # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table
     if var_d.get("reference"):
diff --git a/esmvaltool/config-references.yml b/esmvaltool/config-references.yml
index 939696123f..f0aed75a6d 100644
--- a/esmvaltool/config-references.yml
+++ b/esmvaltool/config-references.yml
@@ -229,6 +229,10 @@ authors:
     name: Winterstein, Franziska
     institute: DLR, Germany
     orcid: https://orcid.org/0000-0002-2406-4936
+  fruttarol_noah:
+    name: Fruttarol, Noah
+    institute: CCCma, ECCC, Canada
+    github: noahfruttarol
   fuckar_neven:
     name: Fuckar, Neven
     institute: BSC, Spain

From 0a8f443856e4cf5803a48e279298f81d1ff86e1c Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Thu, 12 Mar 2026 18:05:24 +0000
Subject: [PATCH 11/13] added MISR to check recipe

---
 esmvaltool/recipes/examples/recipe_check_obs.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml
index ea34444d64..9f9a826ae2 100644
--- a/esmvaltool/recipes/examples/recipe_check_obs.yml
+++ b/esmvaltool/recipes/examples/recipe_check_obs.yml
@@ -796,6 +796,19 @@ diagnostics:
     scripts: null
 
 
+  MISR:
+    description: MISR check
+    variables:
+      od550aer:
+        mip: AERmon
+      abs550aer:
+        mip: AERmon
+    additional_datasets:
+      - {dataset: MISR, project: OBS, mip: AERmon, tier: 3,
+         type: sat, version: v0032, start_year: 2000, end_year: 2020}
+    scripts: null
+
+
   MOBO-DIC-MPIM:
     description: MOBO-DIC-MPIM check
     variables:

From 7c9d2a28bcaee7da3b124221ce3087f2eaafd86b Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noah.fruttarol@ec.gc.ca>
Date: Mon, 16 Mar 2026 16:40:40 +0000
Subject: [PATCH 12/13] added dataset to ESMValTool/doc/sphinx/source/input.rst

---
 doc/sphinx/source/input.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst
index 23b791149c..906dd2af27 100644
--- a/doc/sphinx/source/input.rst
+++ b/doc/sphinx/source/input.rst
@@ -424,6 +424,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol
 +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | MLS-AURA* [#t3]_                       | hur, hurStderr (day)                                                                                 |   3  | Python          |
 +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
+| MISR                                   | od550aer, abs550aer (AERmon)                                                                         |   3  | Python          |
++----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | MOBO-DIC-MPIM                          | dissic (Omon)                                                                                        |   2  | Python          |
 +----------------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
 | MOBO-DIC2004-2019                      | dissic (Omon)                                                                                        |   2  | Python          |

From 24529572135b4f9e95407ea7661096c52f422816 Mon Sep 17 00:00:00 2001
From: Noah Fruttarol <noahfruttarol@gmail.com>
Date: Tue, 21 Apr 2026 11:18:37 -0700
Subject: [PATCH 13/13] Revert changes made by mistake.

---
 .../cmorizers/data/cmor_config/MODIS-Aqua.yml |  47 --
 .../data/cmor_config/MODIS-Terra.yml          |  47 --
 esmvaltool/cmorizers/data/datasets.yml        |  23 +-
 .../data/formatters/datasets/_modis_aqua.ncl  | 253 ----------
 .../data/formatters/datasets/_modis_terra.ncl | 253 ----------
 .../data/formatters/datasets/modis_aqua.py    | 441 ------------------
 .../data/formatters/datasets/modis_terra.py   | 441 ------------------
 pyproject.toml                                |   1 -
 8 files changed, 2 insertions(+), 1504 deletions(-)
 delete mode 100644 esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
 delete mode 100644 esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
 delete mode 100644 esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
 delete mode 100644 esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
 delete mode 100644 esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
 delete mode 100644 esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py

diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
deleted file mode 100644
index 812e750d09..0000000000
--- a/esmvaltool/cmorizers/data/cmor_config/MODIS-Aqua.yml
+++ /dev/null
@@ -1,47 +0,0 @@
----
-# Author: 2026-01-29 Kristi Webb
-
-# Common global attributes for Cmorizer output
-attributes:
-
-  project_id: CMIP6
-  dataset_id: MODIS-Aqua
-  modeling_realm: atmos
-  type: sat
-  tier: 3
-  version: MYD08_M3
-  source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
-  reference: modis1
-
-  filename_fmt: "MYD08_M3.A*.hdf"
-
-variables:
-  clwvi:
-    raw_name: Cloud_Water_Path_Liquid_Mean_Mean
-    mip: Amon
-    frequency: mon
-  clivi:
-    raw_name: Cloud_Water_Path_Ice_Mean_Mean
-    mip: Amon
-    frequency: mon
-  clt:
-    raw_name: Cloud_Fraction_Mean_Mean
-    mip: Amon
-    frequency: mon
-  lwpStderr:
-    raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty
-    mip: custom
-    frequency: mon
-  iwpStderr:
-    raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty
-    mip: custom
-    frequency: mon
-  od550aer:
-    raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean
-    mip: AERmon
-    frequency: mon
-    reference: modis2
-  reffclwtop:
-    raw_name: Cloud_Effective_Radius_Liquid_Mean_Mean
-    mip: AERmon
-    frequency: mon
diff --git a/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml b/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
deleted file mode 100644
index e30f90835f..0000000000
--- a/esmvaltool/cmorizers/data/cmor_config/MODIS-Terra.yml
+++ /dev/null
@@ -1,47 +0,0 @@
----
-# Author: 2026-01-29 Kristi Webb
-
-# Common global attributes for Cmorizer output
-attributes:
-
-  project_id: CMIP6
-  dataset_id: MODIS-Terra
-  modeling_realm: atmos
-  type: sat
-  tier: 3
-  version: MOD08_M3
-  source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
-  reference: modis1
-
-  filename_fmt: "MOD08_M3.A*.hdf"
-
-variables:
-  clwvi:
-    raw_name: Cloud_Water_Path_Liquid_Mean_Mean
-    mip: Amon
-    frequency: mon
-  clivi:
-    raw_name: Cloud_Water_Path_Ice_Mean_Mean
-    mip: Amon
-    frequency: mon
-  clt:
-    raw_name: Cloud_Fraction_Mean_Mean
-    mip: Amon
-    frequency: mon
-  lwpStderr:
-    raw_name: Cloud_Water_Path_Liquid_Mean_Uncertainty
-    mip: custom
-    frequency: mon
-  iwpStderr:
-    raw_name: Cloud_Water_Path_Ice_Mean_Uncertainty
-    mip: custom
-    frequency: mon
-  od550aer:
-    raw_name: AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean
-    mip: AERmon
-    frequency: mon
-    reference: modis2
-  reffclwtop:
-    raw_name: Cloud_Effective_Radius_Liquid_Mean_Mean
-    mip: AERmon
-    frequency: mon
diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml
index a4aa17cb99..4d4852d8a6 100644
--- a/esmvaltool/cmorizers/data/datasets.yml
+++ b/esmvaltool/cmorizers/data/datasets.yml
@@ -1092,11 +1092,10 @@ datasets:
       and then follow the instructions for downloading the data.
       The data is currently freely available, but a registration is required.
 
-  MODIS-Aqua:
-
+  MODIS:
     tier: 3
     source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
-    last_access: 2026-01-30
+    last_access: 2019-02-09
     info: |
       In Products: select "MODIS Aqua", "Collection 6.1" and
         "L3 Atmosphere Product", click on MYD08_M3.
@@ -1111,24 +1110,6 @@ datasets:
         grid-box averages --> multiply MODIS clwvi and clivi values with cloud
         fraction as a first guess
 
-  MODIS-Terra:
-    tier: 3
-    source: https://ladsweb.modaps.eosdis.nasa.gov/search/order
-    last_access: 2026-01-30
-    info: |
-      In Products: select "MODIS Terra", "Collection 6.1" and
-        "L3 Atmosphere Product", click on MOD08_M3.
-      In Time: select from 2000-01-01 to today.
-      In Location: skip, the global domain will be applied.
-      In Files: select all.
-      Submit the order.
-      A registration is required to download the data.
-
-      Caveats
-        clwvi and clivi data are in-cloud values whereas CMIP5 models provide
-        grid-box averages --> multiply MODIS clwvi and clivi values with cloud
-        fraction as a first guess
-
   MTE:
     tier: 3
     source: http://www.bgc-jena.mpg.de/geodb/BGI/Home
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl b/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
deleted file mode 100644
index f1829bd6b6..0000000000
--- a/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
+++ /dev/null
@@ -1,253 +0,0 @@
-; #############################################################################
-; ESMValTool CMORizer for MODIS data
-; #############################################################################
-;
-; Tier
-;    Tier 3: restricted dataset.
-;
-; Source
-;    https://ladsweb.modaps.eosdis.nasa.gov/search/order
-;
-; Last access
-;    20190209
-;
-; Download and processing instructions
-;    In Products: select "MODIS Aqua", "Collection 6.1" and
-;                 "L3 Atmosphere Product", click on MYD08_M3.
-;    In Time: select from 2000-01-01 to today.
-;    In Location: skip, the global domain will be applied.
-;    In Files: select all.
-;    Submit the order.
-;    A registration is required to download the data.
-;
-; Caveats
-;    clwvi and clivi data are in-cloud values whereas CMIP5 models provide
-;    grid-box averages --> multiply MODIS clwvi and clivi values with cloud
-;    fraction as a first guess
-;
-; Modification history
-;    20180209-righi_mattia: fixed bug in lwpStderr.
-;    20180209-hassler_birgit: adapted to v2.
-;    20180810-righi_mattia: fix minor calendar issue.
-;    20180806-righi_mattia: code cleaning.
-;    20170116-lauer_axel: using cirrus fraction to gridbox averages.
-;    20160408-lauer_axel: added processing of uncertainties.
-;    20151118-lauer_axel: bugfix: added unit conversion.
-;    20150430-evaldsson_martin: written.
-;
-; #############################################################################
-loadscript(getenv("esmvaltool_root") + \
-           "/data/formatters/interface.ncl")
-
-begin
-
-  ; Script name (for logger)
-  DIAG_SCRIPT = "modis_aqua.ncl"
-
-  ; Source name
-  OBSNAME = "MODIS-Aqua"
-
-  ; Tier
-  TIER = 3
-
-  ; Selected variable (standard name)
-  VAR = (/"clwvi",     \
-          "clivi",     \
-          "clt",       \
-          "lwpStderr", \
-          "iwpStderr", \
-          "od550aer",  \
-          "reffclwtop"/)
-
-  ; Name in the raw data
-  NAME = (/"Cloud_Water_Path_Liquid_Mean_Mean",        \
-           "Cloud_Water_Path_Ice_Mean_Mean",           \
-           "Cloud_Fraction_Mean_Mean",                 \
-           "Cloud_Water_Path_Liquid_Mean_Uncertainty", \
-           "Cloud_Water_Path_Ice_Mean_Uncertainty",    \
-           "AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean", \
-           "Cloud_Effective_Radius_Liquid_Mean_Mean"/)
-
-  ; MIP
-  MIP = (/"Amon", "Amon", "Amon", "Amon", "Amon", "aero", "aero"/)
-
-  ; Frequency
-  FREQ = (/"mon", "mon", "mon", "mon", "mon", "mon", "mon"/)
-
-  ; Version
-  VERSION = "MYD08_M3"
-
-  ; CMOR table
-  CMOR_TABLE = getenv("cmor_tables") + \
-    (/"/cmip5/Tables/CMIP5_Amon", \
-      "/cmip5/Tables/CMIP5_Amon", \
-      "/cmip5/Tables/CMIP5_Amon", \
-      "/custom/CMOR_lwpStderr.dat", \
-      "/custom/CMOR_iwpStderr.dat", \
-      "/cmip5/Tables/CMIP5_aero", \
-      "/cmip5/Tables/CMIP5_aero"/)
-
-  ; Type
-  TYPE = "sat"
-
-  ; Global attributes
-  SOURCE = "https://ladsweb.modaps.eosdis.nasa.gov/search/order"
-  REF1 = "Platnick et al., IEEE Trans. Geosci. Remote Sens., " + \
-    "doi:10.1109/TGRS.2002.808301, 2003."
-  REF2 = "Levy et al., Atmos. Meas. Tech., " + \
-    "doi:10.5194/amt-6-2989-2013, 2013."
-  COMMENT = ""
-
-end
-
-begin
-
-  ; List of files
-  FILES = systemfunc("ls -1 " + input_dir_path + VERSION + ".A*.hdf")
-
-  do ff = 0, dimsizes(FILES) - 1
-
-    fin = addfile(FILES(ff), "r")
-
-    ; Get time
-    infile = systemfunc("basename " + FILES(ff))
-    date   = yyyyddd_to_yyyymmdd(toint(str_get_cols(infile, 10, 16)))
-    year   = toint(str_get_cols(tostring(date), 0, 3))
-    month  = toint(str_get_cols(tostring(date), 4, 5))
-    dm = days_in_month(year, month)
-
-    ; Loop over variables to fetch from input file
-    do vv = 0, dimsizes(VAR) - 1
-
-      invar = fin->$NAME(vv)$
-      invar_fv = invar@_FillValue
-      invar_coords = invar
-      invar := tofloat(invar)
-      invar := where(invar.eq.tofloat(invar_fv), \
-                     default_fillvalue("float"), invar)
-
-      ; Special case clwvi as the sum lwp + iwp
-      if (VAR(vv).eq."clwvi") then
-        if (NAME(vv).ne."Cloud_Water_Path_Liquid_Mean_Mean") then
-          error_msg("f", DIAG_SCRIPT, "", "cannot calculate clwvi")
-        end if
-
-        ; Read cirrus fraction
-        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
-        cfin = fin->Cirrus_Fraction_Infrared_FMean
-        cif = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-        ; liquid fraction is estimated assuming random overlap, i.e.
-        ; ctot = 1 - (1 - cif) * (1 - lif)
-        ; --> lif = 1 - (1 - ctot) / (1 - cif)
-        delete(cfin)
-        cfin = fin->Cloud_Fraction_Mean_Mean
-        ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-        delete(cfin)
-        cif = where(cif.gt.0.999, cif@_FillValue, cif)
-        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
-        lif = where(lif.lt.0, 0, lif)
-        tmpvar = fin->Cloud_Water_Path_Ice_Mean_Mean  ; read ice water path
-        tmpvar_fv = tmpvar@_FillValue
-        tmpvar := tofloat(tmpvar)
-        tmpvar := where(tmpvar.eq.tofloat(tmpvar_fv), \
-                        default_fillvalue("float"), \
-                        tmpvar)
-        tmpvar = tmpvar * cif  ; convert iwp in-cloud value to gridbox avg
-        invar = invar * lif  ; convert lwp in-cloud value to grid-box avg
-        invar = invar + tmpvar  ; clwvi = lwp + iwp
-        delete(tmpvar)
-        delete(lif)
-        delete(cif)
-        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
-      end if
-
-      ; lwp and iwp are in-cloud values
-      ; convert lwp/iwp to grid-box averages by multiplying with
-      ; average cloud fraction (not optimum but best we can do at the moment)
-      if (any((/"clivi", "iwpStderr", "lwpStderr"/) .eq. VAR(vv))) then
-
-        ; Read cirrus fraction (0-1)
-        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
-        cfin = fin->Cirrus_Fraction_Infrared_FMean
-        cf = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-        delete(cfin)
-        if (VAR(vv).eq."lwpStderr") then
-          cfin = fin->Cloud_Fraction_Mean_Mean
-          ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-          delete(cfin)
-          cif = where(cf.gt.0.999, cf@_FillValue, cf)
-          cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
-          cf = where(cf.lt.0, 0, cf)
-          delete(cif)
-          delete(ctot)
-        end if
-        invar = invar * cf  ; ; "grid-box average" lwp/iwp
-        delete(cf)
-        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
-      end if
-
-      invar@_FillValue = default_fillvalue("float")
-      copy_VarCoords(invar_coords, invar)
-      if (isatt(invar_coords, "scale_factor")) then
-        invar = invar * tofloat(invar_coords@scale_factor)
-      end if
-      if (isatt(invar_coords, "add_offset")) then
-        invar = invar + tofloat(invar_coords@add_offset)
-      end if
-
-      if (VAR(vv).eq."clt") then
-        invar = 100.0 * invar  ; [1] --> [%]
-      end if
-
-      ; Create output variable
-      lat = fin->YDim
-      lon = fin->XDim
-      output = new((/1, dimsizes(lat), dimsizes(lon)/), float)
-      output!0 = "time"
-      output!1 = "lat"
-      output!2 = "lon"
-      output&time = cd_inv_calendar(year, month, 15, 0, 0, 0, TUNITS, 0)
-      output&lat = lat
-      output&lon = lon
-      output(0, :, :) = (/invar/)
-      delete(invar)
-      delete(invar_coords)
-
-      ; Format coordinates
-      format_coords(output, year + sprinti("%0.2i", month) + "01", \
-                    year + sprinti("%0.2i", month) + dm, FREQ(vv))
-
-      ; Set variable attributes
-      tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv))
-      delete(output)
-      output = tmp
-      delete(tmp)
-
-      ; Calculate coordinate bounds
-      bounds = guess_coord_bounds(output, FREQ(vv))
-
-      ; Set global attributes
-      if (VAR(vv).ne."od550aer") then
-        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF1, COMMENT)
-      else
-        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF2, COMMENT)
-      end if
-
-      ; Output file
-      DATESTR = \
-        year + sprinti("%0.2i", month) + "-" + year + sprinti("%0.2i", month)
-      fout = output_dir_path + \
-        str_join((/"OBS", OBSNAME, TYPE, str_sub_str(VERSION, "_", "-"), \
-                   MIP(vv), VAR(vv), DATESTR/), "_") + ".nc"
-
-      ; Write variable
-      write_nc(fout, VAR(vv), output, bounds, gAtt)
-      delete(gAtt)
-      delete(output)
-      delete(bounds)
-
-    end do
-
-  end do
-
-end
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl b/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
deleted file mode 100644
index 2e02498a88..0000000000
--- a/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
+++ /dev/null
@@ -1,253 +0,0 @@
-; #############################################################################
-; ESMValTool CMORizer for MODIS data
-; #############################################################################
-;
-; Tier
-;    Tier 3: restricted dataset.
-;
-; Source
-;    https://ladsweb.modaps.eosdis.nasa.gov/search/order
-;
-; Last access
-;    20190209
-;
-; Download and processing instructions
-;    In Products: select "MODIS Aqua", "Collection 6.1" and
-;                 "L3 Atmosphere Product", click on MOD08_M3.
-;    In Time: select from 2000-01-01 to today.
-;    In Location: skip, the global domain will be applied.
-;    In Files: select all.
-;    Submit the order.
-;    A registration is required to download the data.
-;
-; Caveats
-;    clwvi and clivi data are in-cloud values whereas CMIP5 models provide
-;    grid-box averages --> multiply MODIS clwvi and clivi values with cloud
-;    fraction as a first guess
-;
-; Modification history
-;    20180209-righi_mattia: fixed bug in lwpStderr.
-;    20180209-hassler_birgit: adapted to v2.
-;    20180810-righi_mattia: fix minor calendar issue.
-;    20180806-righi_mattia: code cleaning.
-;    20170116-lauer_axel: using cirrus fraction to gridbox averages.
-;    20160408-lauer_axel: added processing of uncertainties.
-;    20151118-lauer_axel: bugfix: added unit conversion.
-;    20150430-evaldsson_martin: written.
-;
-; #############################################################################
-loadscript(getenv("esmvaltool_root") + \
-           "/data/formatters/interface.ncl")
-
-begin
-
-  ; Script name (for logger)
-  DIAG_SCRIPT = "modis_terra.ncl"
-
-  ; Source name
-  OBSNAME = "MODIS-Terra"
-
-  ; Tier
-  TIER = 3
-
-  ; Selected variable (standard name)
-  VAR = (/"clwvi",     \
-          "clivi",     \
-          "clt",       \
-          "lwpStderr", \
-          "iwpStderr", \
-          "od550aer",  \
-          "reffclwtop"/)
-
-  ; Name in the raw data
-  NAME = (/"Cloud_Water_Path_Liquid_Mean_Mean",        \
-           "Cloud_Water_Path_Ice_Mean_Mean",           \
-           "Cloud_Fraction_Mean_Mean",                 \
-           "Cloud_Water_Path_Liquid_Mean_Uncertainty", \
-           "Cloud_Water_Path_Ice_Mean_Uncertainty",    \
-           "AOD_550_Dark_Target_Deep_Blue_Combined_Mean_Mean", \
-           "Cloud_Effective_Radius_Liquid_Mean_Mean"/)
-
-  ; MIP
-  MIP = (/"Amon", "Amon", "Amon", "Amon", "Amon", "aero", "aero"/)
-
-  ; Frequency
-  FREQ = (/"mon", "mon", "mon", "mon", "mon", "mon", "mon"/)
-
-  ; Version
-  VERSION = "MOD08_M3"
-
-  ; CMOR table
-  CMOR_TABLE = getenv("cmor_tables") + \
-    (/"/cmip5/Tables/CMIP5_Amon", \
-      "/cmip5/Tables/CMIP5_Amon", \
-      "/cmip5/Tables/CMIP5_Amon", \
-      "/custom/CMOR_lwpStderr.dat", \
-      "/custom/CMOR_iwpStderr.dat", \
-      "/cmip5/Tables/CMIP5_aero", \
-      "/cmip5/Tables/CMIP5_aero"/)
-
-  ; Type
-  TYPE = "sat"
-
-  ; Global attributes
-  SOURCE = "https://ladsweb.modaps.eosdis.nasa.gov/search/order"
-  REF1 = "Platnick et al., IEEE Trans. Geosci. Remote Sens., " + \
-    "doi:10.1109/TGRS.2002.808301, 2003."
-  REF2 = "Levy et al., Atmos. Meas. Tech., " + \
-    "doi:10.5194/amt-6-2989-2013, 2013."
-  COMMENT = ""
-
-end
-
-begin
-
-  ; List of files
-  FILES = systemfunc("ls -1 " + input_dir_path + VERSION + ".A*.hdf")
-
-  do ff = 0, dimsizes(FILES) - 1
-
-    fin = addfile(FILES(ff), "r")
-
-    ; Get time
-    infile = systemfunc("basename " + FILES(ff))
-    date   = yyyyddd_to_yyyymmdd(toint(str_get_cols(infile, 10, 16)))
-    year   = toint(str_get_cols(tostring(date), 0, 3))
-    month  = toint(str_get_cols(tostring(date), 4, 5))
-    dm = days_in_month(year, month)
-
-    ; Loop over variables to fetch from input file
-    do vv = 0, dimsizes(VAR) - 1
-
-      invar = fin->$NAME(vv)$
-      invar_fv = invar@_FillValue
-      invar_coords = invar
-      invar := tofloat(invar)
-      invar := where(invar.eq.tofloat(invar_fv), \
-                     default_fillvalue("float"), invar)
-
-      ; Special case clwvi as the sum lwp + iwp
-      if (VAR(vv).eq."clwvi") then
-        if (NAME(vv).ne."Cloud_Water_Path_Liquid_Mean_Mean") then
-          error_msg("f", DIAG_SCRIPT, "", "cannot calculate clwvi")
-        end if
-
-        ; Read cirrus fraction
-        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
-        cfin = fin->Cirrus_Fraction_Infrared_FMean
-        cif = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-        ; liquid fraction is estimated assuming random overlap, i.e.
-        ; ctot = 1 - (1 - cif) * (1 - lif)
-        ; --> lif = 1 - (1 - ctot) / (1 - cif)
-        delete(cfin)
-        cfin = fin->Cloud_Fraction_Mean_Mean
-        ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-        delete(cfin)
-        cif = where(cif.gt.0.999, cif@_FillValue, cif)
-        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
-        lif = where(lif.lt.0, 0, lif)
-        tmpvar = fin->Cloud_Water_Path_Ice_Mean_Mean  ; read ice water path
-        tmpvar_fv = tmpvar@_FillValue
-        tmpvar := tofloat(tmpvar)
-        tmpvar := where(tmpvar.eq.tofloat(tmpvar_fv), \
-                        default_fillvalue("float"), \
-                        tmpvar)
-        tmpvar = tmpvar * cif  ; convert iwp in-cloud value to gridbox avg
-        invar = invar * lif  ; convert lwp in-cloud value to grid-box avg
-        invar = invar + tmpvar  ; clwvi = lwp + iwp
-        delete(tmpvar)
-        delete(lif)
-        delete(cif)
-        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
-      end if
-
-      ; lwp and iwp are in-cloud values
-      ; convert lwp/iwp to grid-box averages by multiplying with
-      ; average cloud fraction (not optimum but best we can do at the moment)
-      if (any((/"clivi", "iwpStderr", "lwpStderr"/) .eq. VAR(vv))) then
-
-        ; Read cirrus fraction (0-1)
-        ; cfin = fin->Cirrus_Fraction_SWIR_FMean
-        cfin = fin->Cirrus_Fraction_Infrared_FMean
-        cf = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-        delete(cfin)
-        if (VAR(vv).eq."lwpStderr") then
-          cfin = fin->Cloud_Fraction_Mean_Mean
-          ctot = tofloat(cfin * cfin@scale_factor + cfin@add_offset)
-          delete(cfin)
-          cif = where(cf.gt.0.999, cf@_FillValue, cf)
-          cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
-          cf = where(cf.lt.0, 0, cf)
-          delete(cif)
-          delete(ctot)
-        end if
-        invar = invar * cf  ; ; "grid-box average" lwp/iwp
-        delete(cf)
-        invar = 0.001 * invar  ; [g/m2] --> [kg/m2]
-      end if
-
-      invar@_FillValue = default_fillvalue("float")
-      copy_VarCoords(invar_coords, invar)
-      if (isatt(invar_coords, "scale_factor")) then
-        invar = invar * tofloat(invar_coords@scale_factor)
-      end if
-      if (isatt(invar_coords, "add_offset")) then
-        invar = invar + tofloat(invar_coords@add_offset)
-      end if
-
-      if (VAR(vv).eq."clt") then
-        invar = 100.0 * invar  ; [1] --> [%]
-      end if
-
-      ; Create output variable
-      lat = fin->YDim
-      lon = fin->XDim
-      output = new((/1, dimsizes(lat), dimsizes(lon)/), float)
-      output!0 = "time"
-      output!1 = "lat"
-      output!2 = "lon"
-      output&time = cd_inv_calendar(year, month, 15, 0, 0, 0, TUNITS, 0)
-      output&lat = lat
-      output&lon = lon
-      output(0, :, :) = (/invar/)
-      delete(invar)
-      delete(invar_coords)
-
-      ; Format coordinates
-      format_coords(output, year + sprinti("%0.2i", month) + "01", \
-                    year + sprinti("%0.2i", month) + dm, FREQ(vv))
-
-      ; Set variable attributes
-      tmp = format_variable(output, VAR(vv), CMOR_TABLE(vv))
-      delete(output)
-      output = tmp
-      delete(tmp)
-
-      ; Calculate coordinate bounds
-      bounds = guess_coord_bounds(output, FREQ(vv))
-
-      ; Set global attributes
-      if (VAR(vv).ne."od550aer") then
-        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF1, COMMENT)
-      else
-        gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF2, COMMENT)
-      end if
-
-      ; Output file
-      DATESTR = \
-        year + sprinti("%0.2i", month) + "-" + year + sprinti("%0.2i", month)
-      fout = output_dir_path + \
-        str_join((/"OBS", OBSNAME, TYPE, str_sub_str(VERSION, "_", "-"), \
-                   MIP(vv), VAR(vv), DATESTR/), "_") + ".nc"
-
-      ; Write variable
-      write_nc(fout, VAR(vv), output, bounds, gAtt)
-      delete(gAtt)
-      delete(output)
-      delete(bounds)
-
-    end do
-
-  end do
-
-end
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
deleted file mode 100644
index 80859e31f3..0000000000
--- a/esmvaltool/cmorizers/data/formatters/datasets/modis_aqua.py
+++ /dev/null
@@ -1,441 +0,0 @@
-"""ESMValTool CMORizer for MODIS aqua data.
-
-Tier
-    Tier 3
-
-Source
-    https://ladsweb.modaps.eosdis.nasa.gov/search/order
-
-Requirements
-  python hdf reader, https://pypi.org/project/pyhdf/
-
-Modification history
-    20260216-Fruttarol_Noah: Written based on the original MODIS CMORization NCL script created by evaldsson_martin, which can be found in at ESMValTool/esmvaltool/cmorizers/data/formatters/datasets/_modis_aqua.ncl
-
-
-"""
-
-import logging
-import os
-import re
-from datetime import datetime, timedelta
-
-import iris
-import numpy as np
-from dask import array as da
-from esmvalcore.cmor.table import CMOR_TABLES
-from pyhdf.SD import SD, SDC
-
-from esmvaltool.cmorizers.data import utilities as utils
-
-logger = logging.getLogger(__name__)
-
-
-def collect_files(in_dir: str, cfg) -> list:
-    """Compose input file list and download if missing."""
-
-    attrs = cfg["attributes"]
-    # rootpath / Tier{tier}/{dataset}/{version}/{frequency}/{short_name}
-    # in_dir  = rootpath / Tier{tier}/{dataset}
-    in_dir = os.path.join(in_dir)
-    fname_pattern = (
-        f"{attrs['version']}\\.A.*\\.hdf$"  # e.g. "MYD08_M3.A*.hdf"
-    )
-
-    files_a = next(os.walk(in_dir), (None, None, []))[
-        2
-    ]  # get list of files in in_dir
-    files = []
-    for filename in files_a:
-        if re.search(fname_pattern, filename):
-            files.append(filename)
-
-    if len(files) == 0:
-        raise Exception("No files found", in_dir)
-
-    return files
-
-
-def get_year_from_filepath(filepath: str, cfg) -> int:
-    """get year from first four characters of last section of basename"""
-
-    attrs = cfg["attributes"]
-    basename = os.path.splitext(os.path.basename(filepath))[0]
-    offset = len(
-        f"{attrs['version']}.A"
-    )  # length of version string + dot + 'A' character
-    year = int(basename[offset : offset + 4])
-    return year
-
-
-def get_month_from_filepath(filepath: str, cfg) -> int:
-    """get month from timerange section of basename"""
-
-    attrs = cfg["attributes"]
-    basename = os.path.splitext(os.path.basename(filepath))[0]
-    offset = len(
-        f"{attrs['version']}.A"
-    )  # length of version string + dot + 'A' character
-    doy = int(basename[offset + 4 : offset + 7])  # day of year
-    year = int(basename[offset : offset + 4])
-    # Convert day-of-year to month
-    date = datetime(year, 1, 1) + timedelta(days=doy - 1)
-    month = date.month
-    logger.debug(
-        f"Extracted month {month}, doy {doy}, year {year} from filepath {filepath}"
-    )
-    return month
-
-
-def group_files_by_year(filepaths: list, cfg) -> dict:
-    """group filepaths by year using get_year_from_filepath function"""
-    years_D = dict()
-    for filename in filepaths:
-        year = get_year_from_filepath(filename, cfg)
-
-        if year not in years_D:
-            years_D[year] = [filename]
-        else:
-            years_D[year].append(filename)
-
-    return years_D
-
-
-def read_hdf(
-    in_dir: str,
-    filepath: str,
-    year: int,
-    month: int,
-    raw_name: str,
-    short_name: str,
-    **extras,
-):
-    """Read HDF file and build iris cube with auxiliary data for special variables."""
-    f = SD(os.path.join(in_dir, filepath), SDC.READ)
-
-    data_obj = f.select(raw_name)
-    data = data_obj.get().astype(np.float32)
-
-    # mask fill values
-    if hasattr(data_obj, "fill_value"):
-        data[data == data_obj.fill_value] = np.nan
-
-    # mask values outside valid range
-    if hasattr(data_obj, "valid_range"):
-        valid_range = data_obj.valid_range
-        logger.debug(f"Masking {short_name} data outside range {valid_range}")
-        data[(data < valid_range[0]) | (data > valid_range[1])] = np.nan
-
-    # apply scale factor and add offset if they exist
-    if hasattr(data_obj, "scale_factor"):
-        scale_factor = data_obj.scale_factor
-        logger.debug(f"Scaling {short_name} data by {scale_factor}")
-        data *= scale_factor
-    if hasattr(data_obj, "add_offset"):
-        add_offset = data_obj.add_offset
-        logger.debug(f"Offsetting {short_name} data by {add_offset}")
-        data += add_offset
-
-    # for certain variables, we need to read additional datasets to compute the final variable
-
-    # liquid water path in-cloud, need to use cirrus fraction and cloud fraction to estimate liquid fraction, then multiply by in-cloud lwp to get grid-box average lwp
-    if short_name in ["clwvi"]:
-        cfirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
-        cfin = cfirFm_obj.get().astype(np.float32)
-        # unpack with scale_factor and add_offset
-        cif = (cfin * cfirFm_obj.scale_factor) + cfirFm_obj.add_offset
-        cif[cif > 0.999] = np.nan
-
-        cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
-        cfmm = cfmm_obj.get().astype(np.float32)
-        ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
-
-        # liquid fraction estimated assuming random overlap:
-        # ctot = 1 - (1 - cif) * (1 - lif)
-        # --> lif = 1 - (1 - ctot) / (1 - cif)
-        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
-        lif[lif < 0] = 0
-
-        # ice water path
-        cwpimm_obj = f.select("Cloud_Water_Path_Ice_Mean_Mean")
-        cwpimm = cwpimm_obj.get().astype(np.float32)
-        cwpimm_fv = cwpimm_obj._FillValue
-        cwpimm[cwpimm == cwpimm_fv] = np.nan  # mask all fill values
-
-        # convert iwp in-cloud to gridbox avg using masked cirrus fraction
-        iwp = cwpimm * cif
-
-        # liquid water path
-        # convert lwp in-cloud value to grid-box avg
-        lwp = data * lif
-
-        data = lwp + iwp
-
-    # these variables are all in-cloud values, so we need to multiply by cirrus fraction to get grid-box average
-    elif short_name in ["clivi", "iwpStderr", "lwpStderr"]:
-        # cirrus fraction (0-1)
-        cfswirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
-        cfswirFm = cfswirFm_obj.get().astype(np.float32)
-        # unpack with scale_factor and add_offset
-        cf = (cfswirFm * cfswirFm_obj.scale_factor) + cfswirFm_obj.add_offset
-        cf[cf > 0.999] = np.nan
-
-        # lwpStderr is only for liquid water path, so we want to use the liquid fraction to convert to grid-box average
-        if short_name in ["lwpStderr"]:
-            cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
-            cfmm = cfmm_obj.get().astype(np.float32)
-            # unpack with scale_factor and add_offset
-            ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
-
-            cif = cf
-            cif[cf > 0.999] = np.nan
-
-            cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
-            cf[cf < 0] = 0
-
-        data *= cf  # "grid-box average" lwp/iwp
-
-    # mask all null values (nan, inf)
-    data = np.ma.masked_invalid(data)
-
-    # Handle units - convert invalid units to '1'
-    units_str = data_obj.attributes().get("units", "1")
-    if units_str in ["none", "None", ""]:
-        units_str = "1"
-
-    # Create time coordinate for the start of the month, with bounds covering the whole month
-    time_point = datetime(
-        year=year, month=month, day=15
-    )  # use 15th as representative time for the month
-    time_bounds_lower = datetime(year=year, month=month, day=1)
-    time_bounds_upper = datetime(
-        year=year + (month == 12), month=month + 1 - (month == 12) * 12, day=1
-    ) - timedelta(
-        days=1
-    )  # end of month is the day before the first day of the next month
-
-    # Convert time to days since 1850-01-01
-    time_units = datetime(1850, 1, 1)
-    delta_1850 = (time_point - time_units).days
-    # After creating time delta, add bounds:
-    delta_bounds_lower = (time_bounds_lower - time_units).days
-    delta_bounds_upper = (time_bounds_upper - time_units).days
-
-    time_coord = iris.coords.DimCoord(
-        points=[delta_1850],
-        standard_name="time",
-        units="days since 1850-01-01 00:00:00",
-        bounds=[[delta_bounds_lower, delta_bounds_upper]],
-    )
-    data = data[np.newaxis, :, :]  # Add time dimension at start
-
-    lats = f.select("YDim").get().astype(np.float64)
-    lat_coord = iris.coords.DimCoord(
-        lats,
-        standard_name="latitude",
-        units="degrees_north",
-        bounds=[
-            (  # calculate bounds as midpoint between adjacent latitudes, assuming regular grid
-                lats[i] - 0.5 * np.abs(lats[1] - lats[0]),
-                lats[i] + 0.5 * np.abs(lats[1] - lats[0]),
-            )
-            for i in range(len(lats))
-        ],
-    )
-
-    lons = f.select("XDim").get().astype(np.float64)
-    lon_coord = iris.coords.DimCoord(
-        lons,
-        standard_name="longitude",
-        units="degrees_east",
-        bounds=[
-            (  # calculate bounds as midpoint between adjacent longitudes, assuming regular grid
-                lons[i] - 0.5 * np.abs(lons[1] - lons[0]),
-                lons[i] + 0.5 * np.abs(lons[1] - lons[0]),
-            )
-            for i in range(len(lons))
-        ],
-    )
-
-    if short_name == "od550aer":
-        # add auxiliary coordinate for wavelength (550 nm for this variable, which is AOD at 550 nm)
-        wavelength_coord = iris.coords.AuxCoord(
-            [550],
-            standard_name="radiation_wavelength",
-            var_name="wavelength",
-            units="nm",
-        )
-        aux_coords_and_dims = [
-            (
-                wavelength_coord,
-                None,
-            ),  # wavelength is a scalar auxiliary coordinate
-        ]
-    else:
-        aux_coords_and_dims = (
-            None  # no auxiliary coordinates for other variables
-        )
-
-    cube = iris.cube.Cube(
-        data,
-        long_name=data_obj.attributes().get("long_name", raw_name),
-        var_name=short_name,
-        units=units_str,
-        dim_coords_and_dims=[
-            (time_coord, 0),
-            (lat_coord, 1),
-            (lon_coord, 2),
-        ],
-        aux_coords_and_dims=aux_coords_and_dims,
-    )
-
-    return cube
-
-
-def convert(
-    cube: iris.cube.Cube,
-    cmor_info,
-    attrs,
-) -> iris.cube.Cube:
-    """Convert cube to CMOR standards based on data type and cmor_info."""
-
-    if attrs.get("reference") is None:
-        attrs["reference"] = cmor_info.attributes["reference"]
-
-    cube.convert_units(cmor_info.units)
-
-    if cube.coord("longitude").points.min() < 0:
-        # convert from [-180, 180] to [0, 360]
-        cube.coord("longitude").points = cube.coord("longitude").points + 180.0
-        # roll the data as part of the longitude conversion to maintain correct order (assuming regular grid and that longitude is the last dimension)
-        nlon = len(cube.coord("longitude").points)
-        dim_lon = 2
-        cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=dim_lon)
-
-    if np.diff(cube.coord("latitude").points)[0] < 0:
-        # convert [90,-90] to [-90,90]
-        cube.coord("latitude").points = cube.coord("latitude").points[::-1]
-        # flip the data
-        cube.data = cube.data[:, ::-1, :]  # latitude is axis=1
-
-    utils.set_global_atts(cube, attrs)
-    utils.fix_var_metadata(cube, cmor_info)
-    utils.fix_dim_coordnames(cube)
-
-    utils.fix_coords(cube)
-
-    return cube
-
-
-def _extract_variable(
-    filepaths: list,
-    var_d,
-    attrs,
-    cmor_table,
-    cfg,
-    in_dir: str,
-    out_dir: str,
-    year: int,
-) -> None:
-    """Extract variable data from input files, convert to CMOR standards, and save."""
-
-    # add mip to attributes for use in cmor table lookup and global attributes
-    attrs["mip"] = var_d["mip"]
-
-    # get cmor table for variable
-    cmor_info = cmor_table.get_variable(
-        var_d.get("mip"), var_d.get("short_name")
-    )
-
-    # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table
-    if var_d.get("reference"):
-        attrs["reference"] = var_d.get("reference")
-
-    if not cmor_table:
-        raise Exception(
-            f'project_id "{var_d.get("project_id", "None")}" is invalid, no CMOR table found. Valid options: {", ".join(list(CMOR_TABLES.keys()))}'
-        )
-
-    # check if cmor info was found for the variable, if not log an error and skip
-    if cmor_info is None:
-        logger.error(
-            f"CMOR info not found for variable {var_d.get('short_name')} in mip {var_d.get('mip')}"
-        )
-        return
-    logger.debug(
-        f"CMOR info for variable {var_d.get('short_name')}: {cmor_info}"
-    )
-
-    logger.info(
-        f"CMORizing variable {var_d.get('short_name')} from input set {var_d.get('raw_name')}"
-    )
-
-    cubes = iris.cube.CubeList()
-    for filepath in filepaths:
-        month = get_month_from_filepath(filepath, cfg)
-        logger.debug(
-            f"File: {filepath} -> var={var_d.get('short_name')} year={year}, month={month}"
-        )
-        cube = read_hdf(in_dir, filepath, year, month, **var_d)
-
-        logger.debug(
-            f"Read cube for variable {var_d.get('short_name')}: {cube}, with attributes: {cube.attributes}"
-        )
-        cubes.append(cube)
-
-    if len(cubes) > 0:
-        cube = cubes.concatenate()[
-            0
-        ]  # concatenate along time dimension and take the resulting cube
-    else:
-        cube = cubes[0]
-
-    cube = convert(cube, cmor_info, attrs)
-
-    # add dataset_id to global attributes for provenance
-    cube_attrs = cube.attributes.globals
-    cube_attrs["dataset_id"] = attrs["dataset_id"]
-    logger.debug(f"Saving variable {cube} with attributes: {cube_attrs}")
-
-    utils.save_variable(
-        cube,
-        var_d.get("short_name"),
-        out_dir,
-        cube_attrs,
-        unlimited_dimensions=["time"],
-    )
-
-
-def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
-    """Run CMORizer for MODIS."""
-
-    cmor_table = cfg["cmor_table"]
-    glob_attrs = cfg["attributes"]
-
-    # get relevant files
-    filepaths = collect_files(in_dir, cfg)
-
-    # group by year
-    years_d = group_files_by_year(filepaths, cfg)
-    years = sorted(list(years_d.keys()))
-
-    # iterate over the variables to be CMORized
-    for var, var_d in cfg["variables"].items():
-        logger.info(
-            "CMORizing var %s from input set %s", var, var_d["raw_name"]
-        )
-        var_d["short_name"] = var
-        logger.debug("\n" + format(var_d))
-
-        # cmorize by year, merging all months at the end
-        for year in years:
-            _extract_variable(
-                years_d[year],
-                var_d,
-                glob_attrs,
-                cmor_table,
-                cfg,
-                in_dir,
-                out_dir,
-                year,
-            )
diff --git a/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py b/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py
deleted file mode 100644
index 53e942be64..0000000000
--- a/esmvaltool/cmorizers/data/formatters/datasets/modis_terra.py
+++ /dev/null
@@ -1,441 +0,0 @@
-"""ESMValTool CMORizer for MODIS terra data.
-
-Tier
-    Tier 3
-
-Source
-    https://ladsweb.modaps.eosdis.nasa.gov/search/order
-
-Requirements
-  python hdf reader, https://pypi.org/project/pyhdf/
-
-Modification history
-    20260216-Fruttarol_Noah: Written based on the original MODIS CMORization NCL script created by evaldsson_martin, which can be found in at ESMValTool/esmvaltool/cmorizers/data/formatters/datasets/_modis_terra.ncl
-
-
-"""
-
-import logging
-import os
-import re
-from datetime import datetime, timedelta
-
-import iris
-import numpy as np
-from dask import array as da
-from esmvalcore.cmor.table import CMOR_TABLES
-from pyhdf.SD import SD, SDC
-
-from esmvaltool.cmorizers.data import utilities as utils
-
-logger = logging.getLogger(__name__)
-
-
-def collect_files(in_dir: str, cfg) -> list:
-    """Compose input file list and download if missing."""
-
-    attrs = cfg["attributes"]
-    # rootpath / Tier{tier}/{dataset}/{version}/{frequency}/{short_name}
-    # in_dir  = rootpath / Tier{tier}/{dataset}
-    in_dir = os.path.join(in_dir)
-    fname_pattern = (
-        f"{attrs['version']}\\.A.*\\.hdf$"  # e.g. "MOD08_M3.A*.hdf"
-    )
-
-    files_a = next(os.walk(in_dir), (None, None, []))[
-        2
-    ]  # get list of files in in_dir
-    files = []
-    for filename in files_a:
-        if re.search(fname_pattern, filename):
-            files.append(filename)
-
-    if len(files) == 0:
-        raise Exception("No files found", in_dir)
-
-    return files
-
-
-def get_year_from_filepath(filepath: str, cfg) -> int:
-    """get year from first four characters of last section of basename"""
-
-    attrs = cfg["attributes"]
-    basename = os.path.splitext(os.path.basename(filepath))[0]
-    offset = len(
-        f"{attrs['version']}.A"
-    )  # length of version string + dot + 'A' character
-    year = int(basename[offset : offset + 4])
-    return year
-
-
-def get_month_from_filepath(filepath: str, cfg) -> int:
-    """get month from timerange section of basename"""
-
-    attrs = cfg["attributes"]
-    basename = os.path.splitext(os.path.basename(filepath))[0]
-    offset = len(
-        f"{attrs['version']}.A"
-    )  # length of version string + dot + 'A' character
-    doy = int(basename[offset + 4 : offset + 7])  # day of year
-    year = int(basename[offset : offset + 4])
-    # Convert day-of-year to month
-    date = datetime(year, 1, 1) + timedelta(days=doy - 1)
-    month = date.month
-    logger.debug(
-        f"Extracted month {month}, doy {doy}, year {year} from filepath {filepath}"
-    )
-    return month
-
-
-def group_files_by_year(filepaths: list, cfg) -> dict:
-    """group filepaths by year using get_year_from_filepath function"""
-    years_D = dict()
-    for filename in filepaths:
-        year = get_year_from_filepath(filename, cfg)
-
-        if year not in years_D:
-            years_D[year] = [filename]
-        else:
-            years_D[year].append(filename)
-
-    return years_D
-
-
-def read_hdf(
-    in_dir: str,
-    filepath: str,
-    year: int,
-    month: int,
-    raw_name: str,
-    short_name: str,
-    **extras,
-):
-    """Read HDF file and build iris cube with auxiliary data for special variables."""
-    f = SD(os.path.join(in_dir, filepath), SDC.READ)
-
-    data_obj = f.select(raw_name)
-    data = data_obj.get().astype(np.float32)
-
-    # mask fill values
-    if hasattr(data_obj, "fill_value"):
-        data[data == data_obj.fill_value] = np.nan
-
-    # mask values outside valid range
-    if hasattr(data_obj, "valid_range"):
-        valid_range = data_obj.valid_range
-        logger.debug(f"Masking {short_name} data outside range {valid_range}")
-        data[(data < valid_range[0]) | (data > valid_range[1])] = np.nan
-
-    # apply scale factor and add offset if they exist
-    if hasattr(data_obj, "scale_factor"):
-        scale_factor = data_obj.scale_factor
-        logger.debug(f"Scaling {short_name} data by {scale_factor}")
-        data *= scale_factor
-    if hasattr(data_obj, "add_offset"):
-        add_offset = data_obj.add_offset
-        logger.debug(f"Offsetting {short_name} data by {add_offset}")
-        data += add_offset
-
-    # for certain variables, we need to read additional datasets to compute the final variable
-
-    # liquid water path in-cloud, need to use cirrus fraction and cloud fraction to estimate liquid fraction, then multiply by in-cloud lwp to get grid-box average lwp
-    if short_name in ["clwvi"]:
-        cfirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
-        cfin = cfirFm_obj.get().astype(np.float32)
-        # unpack with scale_factor and add_offset
-        cif = (cfin * cfirFm_obj.scale_factor) + cfirFm_obj.add_offset
-        cif[cif > 0.999] = np.nan
-
-        cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
-        cfmm = cfmm_obj.get().astype(np.float32)
-        ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
-
-        # liquid fraction estimated assuming random overlap:
-        # ctot = 1 - (1 - cif) * (1 - lif)
-        # --> lif = 1 - (1 - ctot) / (1 - cif)
-        lif = 1.0 - (1.0 - ctot) / (1.0 - cif)
-        lif[lif < 0] = 0
-
-        # ice water path
-        cwpimm_obj = f.select("Cloud_Water_Path_Ice_Mean_Mean")
-        cwpimm = cwpimm_obj.get().astype(np.float32)
-        cwpimm_fv = cwpimm_obj._FillValue
-        cwpimm[cwpimm == cwpimm_fv] = np.nan  # mask all fill values
-
-        # convert iwp in-cloud to gridbox avg using masked cirrus fraction
-        iwp = cwpimm * cif
-
-        # liquid water path
-        # convert lwp in-cloud value to grid-box avg
-        lwp = data * lif
-
-        data = lwp + iwp
-
-    # these variables are all in-cloud values, so we need to multiply by cirrus fraction to get grid-box average
-    elif short_name in ["clivi", "iwpStderr", "lwpStderr"]:
-        # cirrus fraction (0-1)
-        cfswirFm_obj = f.select("Cirrus_Fraction_Infrared_FMean")
-        cfswirFm = cfswirFm_obj.get().astype(np.float32)
-        # unpack with scale_factor and add_offset
-        cf = (cfswirFm * cfswirFm_obj.scale_factor) + cfswirFm_obj.add_offset
-        cf[cf > 0.999] = np.nan
-
-        # lwpStderr is only for liquid water path, so we want to use the liquid fraction to convert to grid-box average
-        if short_name in ["lwpStderr"]:
-            cfmm_obj = f.select("Cloud_Fraction_Mean_Mean")
-            cfmm = cfmm_obj.get().astype(np.float32)
-            # unpack with scale_factor and add_offset
-            ctot = (cfmm * cfmm_obj.scale_factor) + cfmm_obj.add_offset
-
-            cif = cf
-            cif[cf > 0.999] = np.nan
-
-            cf = 1.0 - (1.0 - ctot) / (1.0 - cif)
-            cf[cf < 0] = 0
-
-        data *= cf  # "grid-box average" lwp/iwp
-
-    # mask all null values (nan, inf)
-    data = np.ma.masked_invalid(data)
-
-    # Handle units - convert invalid units to '1'
-    units_str = data_obj.attributes().get("units", "1")
-    if units_str in ["none", "None", ""]:
-        units_str = "1"
-
-    # Create time coordinate for the start of the month, with bounds covering the whole month
-    time_point = datetime(
-        year=year, month=month, day=15
-    )  # use 15th as representative time for the month
-    time_bounds_lower = datetime(year=year, month=month, day=1)
-    time_bounds_upper = datetime(
-        year=year + (month == 12), month=month + 1 - (month == 12) * 12, day=1
-    ) - timedelta(
-        days=1
-    )  # end of month is the day before the first day of the next month
-
-    # Convert time to days since 1850-01-01
-    time_units = datetime(1850, 1, 1)
-    delta_1850 = (time_point - time_units).days
-    # After creating time delta, add bounds:
-    delta_bounds_lower = (time_bounds_lower - time_units).days
-    delta_bounds_upper = (time_bounds_upper - time_units).days
-
-    time_coord = iris.coords.DimCoord(
-        points=[delta_1850],
-        standard_name="time",
-        units="days since 1850-01-01 00:00:00",
-        bounds=[[delta_bounds_lower, delta_bounds_upper]],
-    )
-    data = data[np.newaxis, :, :]  # Add time dimension at start
-
-    lats = f.select("YDim").get().astype(np.float64)
-    lat_coord = iris.coords.DimCoord(
-        lats,
-        standard_name="latitude",
-        units="degrees_north",
-        bounds=[
-            (  # calculate bounds as midpoint between adjacent latitudes, assuming regular grid
-                lats[i] - 0.5 * np.abs(lats[1] - lats[0]),
-                lats[i] + 0.5 * np.abs(lats[1] - lats[0]),
-            )
-            for i in range(len(lats))
-        ],
-    )
-
-    lons = f.select("XDim").get().astype(np.float64)
-    lon_coord = iris.coords.DimCoord(
-        lons,
-        standard_name="longitude",
-        units="degrees_east",
-        bounds=[
-            (  # calculate bounds as midpoint between adjacent longitudes, assuming regular grid
-                lons[i] - 0.5 * np.abs(lons[1] - lons[0]),
-                lons[i] + 0.5 * np.abs(lons[1] - lons[0]),
-            )
-            for i in range(len(lons))
-        ],
-    )
-
-    if short_name == "od550aer":
-        # add auxiliary coordinate for wavelength (550 nm for this variable, which is AOD at 550 nm)
-        wavelength_coord = iris.coords.AuxCoord(
-            [550],
-            standard_name="radiation_wavelength",
-            var_name="wavelength",
-            units="nm",
-        )
-        aux_coords_and_dims = [
-            (
-                wavelength_coord,
-                None,
-            ),  # wavelength is a scalar auxiliary coordinate
-        ]
-    else:
-        aux_coords_and_dims = (
-            None  # no auxiliary coordinates for other variables
-        )
-
-    cube = iris.cube.Cube(
-        data,
-        long_name=data_obj.attributes().get("long_name", raw_name),
-        var_name=short_name,
-        units=units_str,
-        dim_coords_and_dims=[
-            (time_coord, 0),
-            (lat_coord, 1),
-            (lon_coord, 2),
-        ],
-        aux_coords_and_dims=aux_coords_and_dims,
-    )
-
-    return cube
-
-
-def convert(
-    cube: iris.cube.Cube,
-    cmor_info,
-    attrs,
-) -> iris.cube.Cube:
-    """Convert cube to CMOR standards based on data type and cmor_info."""
-
-    if attrs.get("reference") is None:
-        attrs["reference"] = cmor_info.attributes["reference"]
-
-    cube.convert_units(cmor_info.units)
-
-    if cube.coord("longitude").points.min() < 0:
-        # convert from [-180, 180] to [0, 360]
-        cube.coord("longitude").points = cube.coord("longitude").points + 180.0
-        # roll the data as part of the longitude conversion to maintain correct order (assuming regular grid and that longitude is the last dimension)
-        nlon = len(cube.coord("longitude").points)
-        dim_lon = 2
-        cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=dim_lon)
-
-    if np.diff(cube.coord("latitude").points)[0] < 0:
-        # convert [90,-90] to [-90,90]
-        cube.coord("latitude").points = cube.coord("latitude").points[::-1]
-        # flip the data
-        cube.data = cube.data[:, ::-1, :]  # latitude is axis=1
-
-    utils.set_global_atts(cube, attrs)
-    utils.fix_var_metadata(cube, cmor_info)
-    utils.fix_dim_coordnames(cube)
-
-    utils.fix_coords(cube)
-
-    return cube
-
-
-def _extract_variable(
-    filepaths: list,
-    var_d,
-    attrs,
-    cmor_table,
-    cfg,
-    in_dir: str,
-    out_dir: str,
-    year: int,
-) -> None:
-    """Extract variable data from input files, convert to CMOR standards, and save."""
-
-    # add mip to attributes for use in cmor table lookup and global attributes
-    attrs["mip"] = var_d["mip"]
-
-    # get the cmor table for the variable
-    cmor_info = cmor_table.get_variable(
-        var_d.get("mip"), var_d.get("short_name")
-    )
-
-    # if this variable has a reference specified in the config, use that, otherwise use the one from the cmor table
-    if var_d.get("reference"):
-        attrs["reference"] = var_d.get("reference")
-
-    if not cmor_table:
-        raise Exception(
-            f'project_id "{var_d.get("project_id", "None")}" is invalid, no CMOR table found. Valid options: {", ".join(list(CMOR_TABLES.keys()))}'
-        )
-
-    # check if cmor info was found for the variable, if not log an error and skip
-    if cmor_info is None:
-        logger.error(
-            f"CMOR info not found for variable {var_d.get('short_name')} in mip {var_d.get('mip')}"
-        )
-        return
-    logger.debug(
-        f"CMOR info for variable {var_d.get('short_name')}: {cmor_info}"
-    )
-
-    logger.info(
-        f"CMORizing variable {var_d.get('short_name')} from input set {var_d.get('raw_name')}"
-    )
-
-    cubes = iris.cube.CubeList()
-    for filepath in filepaths:
-        month = get_month_from_filepath(filepath, cfg)
-        logger.debug(
-            f"File: {filepath} -> var={var_d.get('short_name')} year={year}, month={month}"
-        )
-        cube = read_hdf(in_dir, filepath, year, month, **var_d)
-
-        logger.debug(
-            f"Read cube for variable {var_d.get('short_name')}: {cube}, with attributes: {cube.attributes}"
-        )
-        cubes.append(cube)
-
-    if len(cubes) > 0:
-        cube = cubes.concatenate()[
-            0
-        ]  # concatenate along time dimension and take the resulting cube
-    else:
-        cube = cubes[0]
-
-    cube = convert(cube, cmor_info, attrs)
-
-    # add dataset_id to global attributes for provenance
-    cube_attrs = cube.attributes.globals
-    cube_attrs["dataset_id"] = attrs["dataset_id"]
-    logger.debug(f"Saving variable {cube} with attributes: {cube_attrs}")
-
-    utils.save_variable(
-        cube,
-        var_d.get("short_name"),
-        out_dir,
-        cube_attrs,
-        unlimited_dimensions=["time"],
-    )
-
-
-def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
-    """Run CMORizer for MODIS."""
-
-    cmor_table = cfg["cmor_table"]
-    glob_attrs = cfg["attributes"]
-
-    # get relevant files
-    filepaths = collect_files(in_dir, cfg)
-
-    # group by year
-    years_d = group_files_by_year(filepaths, cfg)
-    years = sorted(list(years_d.keys()))
-
-    # iterate over the variables to be CMORized
-    for var, var_d in cfg["variables"].items():
-        logger.info(
-            "CMORizing var %s from input set %s", var, var_d["raw_name"]
-        )
-        var_d["short_name"] = var
-        logger.debug("\n" + format(var_d))
-
-        # cmorize by year, merging all months at the end
-        for year in years:
-            _extract_variable(
-                years_d[year],
-                var_d,
-                glob_attrs,
-                cmor_table,
-                cfg,
-                in_dir,
-                out_dir,
-                year,
-            )
diff --git a/pyproject.toml b/pyproject.toml
index 9dcb1d5aca..c31d67b3e9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,6 @@ dependencies = [
     "packaging",
     "pandas",
     "progressbar2",
-    "pyhdf",
     "pyproj>=2.1",
     "pys2index",
     "python-dateutil",