MeteoSwiss · muellch · May 13, 2026 · Jun 15, 2026 · Jun 17, 2026 · Jun 19, 2026
diff --git a/engine/fof_compare.py b/engine/fof_compare.py
@@ -17,7 +17,7 @@
 from util.fof_utils import (
     get_log_file_name,
 )
-from util.log_handler import initialize_detailed_logger, logger
+from util.log_handler import initialize_detailed_logger, log_dataframe, logger
 from util.utils import FileInfo
 
 
@@ -49,8 +49,12 @@ def fof_compare(file1, file2, fof_types, tolerance, rules):
         file1_path = file1.format(fof_type=fof_type)
         file2_path = file2.format(fof_type=fof_type)
 
-        n_rows_file1 = xr.open_dataset(file1_path).sizes["d_body"]
-        n_rows_file2 = xr.open_dataset(file2_path).sizes["d_body"]
+        # Use the real observation count (n_body), not the allocated/padded
+        # dimension (d_body). For radar fof files d_body > n_body, and the
+        # comparison operates on the n_body real rows after padding is stripped
+        # in split_feedback_dataset.
+        n_rows_file1 = xr.open_dataset(file1_path).attrs["n_body"]
+        n_rows_file2 = xr.open_dataset(file2_path).attrs["n_body"]
 
         if n_rows_file1 != n_rows_file2:
             raise ValueError("Files have different numbers of lines!")
@@ -82,11 +86,12 @@ def fof_compare(file1, file2, fof_types, tolerance, rules):
                         "DETAILS", log_level="DEBUG", log_file=log_file_name
                     )
 
-                    detailed_logger.info(
-                        "Differences, veri_data outside of tolerance range"
+                    log_dataframe(
+                        detailed_logger,
+                        "Differences of veri_data",
+                        err,
                     )
-                    detailed_logger.info(err)
-                    detailed_logger.info(tol)
+                    log_dataframe(detailed_logger, "Tolerances of veri_data", tol)
 
 
 if __name__ == "__main__":

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -305,6 +305,77 @@ def fixture_sample_dataset_fof():
     return data
 
 
+@pytest.fixture(name="sample_dataset_radar_fof")
+def fixture_sample_dataset_radar_fof():
+    """Radar FOF dataset mirroring the structure of real emvorado radar fof files:
+    - d_hdr > n_hdr and d_body > n_body: the arrays are over-allocated and the
+      tail [n_*:d_*] is NaN-padded (the real counts live in the n_* attributes).
+    - dlat/dlon are the per-observation positions (radar sorts on these; lat/lon
+      is the single radar-station position, constant per radar -> useless for sorting).
+    - veri_data is 2-D (d_veri, d_body) with d_veri == 1.
+    - all numeric fields are float (real files have no integer fields), and the
+      real data region of veri_data itself can contain NaN (missing reflectivity),
+      indistinguishable by value from the padding NaN -> padding is removed by the
+      n_body count, never by NaN.
+    """
+    n_hdr_size, d_hdr_size = 3, 5
+    n_body_size, d_body_size = 6, 8
+    d_veri_size = 1
+    nan = np.nan
+
+    # header fields (float; padded tail [n_hdr:d_hdr] = NaN). statid stays string.
+    lat = np.array([10.0, 10.0, 10.0, nan, nan])  # radar-station position
+    lon = np.array([20.0, 20.0, 20.0, nan, nan])
+    statid = ["r1", "r2", "r3", "", ""]
+    time_nomi = np.array([0.0, 0.0, 0.0, nan, nan])
+    codetype = np.array([22.0, 22.0, 22.0, nan, nan])
+    lbody = np.array([1.0, 2.0, 3.0, nan, nan])  # sum(real) = 6 = n_body
+    ibody = np.array([1.0, 2.0, 4.0, nan, nan])
+
+    # body fields (float; padded tail [n_body:d_body] = NaN). dlat/dlon are
+    # NaN-free in the real region (true in real files) so the sort key is unique.
+    dlat = np.array([10.1, 20.1, 20.2, 30.1, 30.2, 30.3, nan, nan])
+    dlon = np.array([100.1, 110.1, 110.2, 120.1, 120.2, 120.3, nan, nan])
+    varno = np.array([192, 192, 192, 192, 192, 192, nan, nan])
+    level = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, nan, nan])
+    # real-region veri_data deliberately holds a NaN (index 2) to mimic a
+    # missing reflectivity value coexisting with the padding NaNs.
+    veri_data = np.array([[10.0, 20.0, nan, 40.0, 50.0, 60.0, nan, nan]])  # (d_veri, d_body)
+    obs = np.array([11.0, nan, 29.8, 41.0, 52.0, 59.0, nan, nan])
+    state = np.array([1.0, 7, 1.0, 1.0, 1.0, 1.0, nan, nan])
+    flags = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, nan, nan])
+    check = np.array([32, 1, 32, 32, 32, 32, nan, nan])
+
+    data = xr.Dataset(
+        {
+            "lat": (("d_hdr",), lat),
+            "lon": (("d_hdr",), lon),
+            "statid": (("d_hdr",), statid),
+            "time_nomi": (("d_hdr",), time_nomi),
+            "codetype": (("d_hdr",), codetype),
+            "l_body": (("d_hdr",), lbody),
+            "i_body": (("d_hdr",), ibody),
+            "dlat": (("d_body",), dlat),
+            "dlon": (("d_body",), dlon),
+            "varno": (("d_body",), varno),
+            "level": (("d_body",), level),
+            "veri_data": (("d_veri", "d_body"), veri_data),
+            "obs": (("d_body",), obs),
+            "state": (("d_body",), state),
+            "flags": (("d_body",), flags),
+            "check": (("d_body",), check),
+        },
+        coords={
+            "d_hdr": np.arange(d_hdr_size),
+            "d_body": np.arange(d_body_size),
+            "d_veri": np.arange(d_veri_size),
+        },
+        attrs={"n_hdr": n_hdr_size, "n_body": n_body_size},
+    )
+
+    return data
+
+
 @pytest.fixture(scope="function")
 def fof_file_set(tmp_dir, sample_dataset_fof):
     """

diff --git a/tests/engine/test_fof_compare.py b/tests/engine/test_fof_compare.py
@@ -7,6 +7,7 @@
 import os
 from pathlib import Path
 
+import numpy as np
 import pytest
 from click.testing import CliRunner
 
@@ -133,3 +134,102 @@ def test_fof_compare_consistent(fof_datasets, tmp_dir, monkeypatch, caplog):
         )
 
     assert "Files are consistent!" in caplog.text
+
+
+@pytest.fixture(name="radar_fof_paths", scope="function")
+def fixture_radar_fof_paths(sample_dataset_radar_fof, tmp_dir):
+    """
+    Write padded radar fof files to disk and return paths containing the
+    {fof_type} placeholder (resolved to 'MLL'). Variants:
+    - ref/same: identical padded radar files.
+    - pert: one real-region veri_data value pushed far beyond any tolerance.
+    - bigpad: same n_body as ref but a larger d_body (more NaN padding rows) ->
+      exercises the n_body-vs-d_body fix (size gate + tolerance length).
+    """
+    ds = sample_dataset_radar_fof
+
+    same = ds.copy(deep=True)
+
+    pert = ds.copy(deep=True)
+    vd = pert["veri_data"].values.copy()
+    vd[0, 0] = vd[0, 0] + 100.0
+    pert["veri_data"] = (("d_veri", "d_body"), vd)
+
+    bigpad = ds.pad(d_body=(0, 4), constant_values=np.nan)
+    bigpad.attrs = dict(ds.attrs)  # keep the real n_hdr/n_body counts
+
+    # nanfill: a real-region veri_data NaN (an observation missing in ref) becomes
+    # a real value -> NaN-vs-real mismatch that must be flagged, not silently passed.
+    nanfill = ds.copy(deep=True)
+    vdn = nanfill["veri_data"].values.copy()
+    vdn[0, 2] = 3.0  # index 2 is NaN in the fixture's real region
+    nanfill["veri_data"] = (("d_veri", "d_body"), vdn)
+
+    pattern = os.path.join(tmp_dir, "fofRADAR{fof_type}_%s.nc")
+    paths = {}
+    for name, dataset in (
+        ("ref", ds),
+        ("same", same),
+        ("pert", pert),
+        ("bigpad", bigpad),
+        ("nanfill", nanfill),
+    ):
+        placeholder_path = pattern % name
+        dataset.to_netcdf(placeholder_path.format(fof_type="MLL"))
+        paths[name] = placeholder_path
+    return paths
+
+
+def _run_fof_compare(file1, file2, tolerance, tmp_dir, monkeypatch, caplog):
+    monkeypatch.chdir(tmp_dir)
+    runner = CliRunner()
+    with caplog.at_level(logging.INFO):
+        runner.invoke(
+            fof_compare,
+            [
+                "--file1", file1,
+                "--file2", file2,
+                "--fof-types", "MLL",
+                "--tolerance", str(tolerance),
+                "--rules", "",
+            ],
+        )
+
+
+def test_fof_compare_radar_consistent(radar_fof_paths, tmp_dir, monkeypatch, caplog):
+    """Two identical padded radar fof files compare as consistent."""
+    _run_fof_compare(
+        radar_fof_paths["ref"], radar_fof_paths["same"], 1.0, tmp_dir, monkeypatch, caplog
+    )
+    assert "Files are consistent!" in caplog.text
+
+
+def test_fof_compare_radar_not_consistent(radar_fof_paths, tmp_dir, monkeypatch, caplog):
+    """A single real-region veri_data value out of tolerance is detected."""
+    _run_fof_compare(
+        radar_fof_paths["ref"], radar_fof_paths["pert"], 1.0, tmp_dir, monkeypatch, caplog
+    )
+    assert "Files are NOT consistent!" in caplog.text
+
+
+def test_fof_compare_radar_different_padding(radar_fof_paths, tmp_dir, monkeypatch, caplog):
+    """
+    Two radar files with the same n_body but different d_body (padding) must still
+    compare -- the size gate and tolerance length use n_body, not d_body.
+    """
+    _run_fof_compare(
+        radar_fof_paths["ref"], radar_fof_paths["bigpad"], 1.0, tmp_dir, monkeypatch, caplog
+    )
+    assert "Files are consistent!" in caplog.text
+
+
+def test_fof_compare_radar_nan_vs_real(radar_fof_paths, tmp_dir, monkeypatch, caplog):
+    """
+    A veri_data cell that is NaN in one file but a real value in the other (an
+    observation appearing/disappearing) must be flagged, not silently passed --
+    even with a generous tolerance.
+    """
+    _run_fof_compare(
+        radar_fof_paths["ref"], radar_fof_paths["nanfill"], 1.0, tmp_dir, monkeypatch, caplog
+    )
+    assert "Files are NOT consistent!" in caplog.text
diff --git a/tests/util/test_fof_utils.py b/tests/util/test_fof_utils.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import pytest
+import xarray as xr
 
 from util.fof_utils import (  # write_lines,
     clean_value,
@@ -133,6 +134,75 @@ def test_split_report(ds1, ds_report, ds_obs):
     assert reports == ds_report and observations == ds_obs
 
 
+def test_split_radar_fof(sample_dataset_radar_fof):
+    """
+    Radar FOF files have d_hdr > n_hdr and d_body > n_body (NaN-padded tail), carry
+    dlat/dlon per-observation positions, and a 2-D veri_data (d_veri, d_body).
+    split_feedback_dataset must strip the padding by the n_* counts (NOT by NaN),
+    keep real-region NaNs, and sort observations by dlat/dlon.
+    """
+    ds = sample_dataset_radar_fof
+    n_hdr = ds.attrs["n_hdr"]
+    n_body = ds.attrs["n_body"]
+
+    reports, observations = split_feedback_dataset(ds)
+
+    # padding stripped to the real counts on both dimensions
+    assert reports.sizes["d_hdr"] == n_hdr
+    assert observations.sizes["d_body"] == n_body
+
+    # report padding rows are gone -> only the real station ids remain
+    assert list(reports["statid"].values.astype(str)) == ["r1", "r2", "r3"]
+
+    # 2-D veri_data (d_veri, d_body) survives the slice with the right body length
+    assert observations["veri_data"].sizes["d_body"] == n_body
+
+    # a real-region NaN in veri_data is preserved (only padding is removed, by count)
+    assert np.isnan(observations["veri_data"].values).any()
+
+    # observations are deterministically ordered by dlat (radar branch)
+    dlat_vals = observations["dlat"].values
+    assert list(dlat_vals) == sorted(dlat_vals), "observations not sorted by dlat"
+
+
+def test_split_radar_fof_scattered_padding_raises(sample_dataset_radar_fof):
+    """
+    The strip-by-count assumes real observations are front-packed (padding is a
+    NaN tail). A file with a real observation scattered into the padding region
+    must fail loudly rather than be silently mis-stripped. dlat is the discriminator.
+    """
+    ds = sample_dataset_radar_fof.copy(deep=True)
+    # swap a real dlat (index 5) with a padding NaN (index 7): now a non-NaN dlat
+    # sits beyond n_body and a NaN sits inside the real region.
+    dlat = ds["dlat"].values.copy()
+    dlat[5], dlat[7] = dlat[7], dlat[5]
+    ds["dlat"] = (("d_body",), dlat)
+    with pytest.raises(ValueError, match="not front-packed"):
+        split_feedback_dataset(ds)
+
+
+def test_split_feedback_sort_is_deterministic(sample_dataset_radar_fof, sample_dataset_fof):
+    """
+    The pre-comparison sort must yield the SAME canonical order regardless of the
+    order observations are stored in -- this is what establishes row correspondence
+    between the two files being compared. Reorder observations within a header block
+    (a valid permutation that preserves the i_body/l_body structure) and confirm the
+    split output is identical. Covers the radar (dlat/dlon) and non-radar (lat/lon)
+    sort branches.
+    """
+    cases = [
+        # reverse the 3 observations of the last header block (body indices 3,4,5)
+        (sample_dataset_radar_fof, [0, 1, 2, 5, 4, 3, 6, 7]),
+        # reverse the 2 observations of the last header block (body indices 4,5)
+        (sample_dataset_fof, [0, 1, 2, 3, 5, 4]),
+    ]
+    for ds, body_perm in cases:
+        rep1, obs1 = split_feedback_dataset(ds.copy(deep=True))
+        rep2, obs2 = split_feedback_dataset(ds.isel(d_body=body_perm))
+        xr.testing.assert_identical(obs1, obs2)
+        xr.testing.assert_identical(rep1, rep2)
+
+
 @pytest.fixture(name="arr1", scope="function")
 def fixture_arr1():
     return np.array([1.0, 5.0, 3.0, 4.0, 7.0], dtype=np.float32)

diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py
@@ -388,6 +388,13 @@ def check_file_with_tolerances(
         diff_df = diff_df.groupby(["file_ID", "variable"]).max()
 
     if input_file_ref.file_type == FileType.FOF:
+        # A value present (non-NaN) in one file but missing (NaN) in the other is a
+        # real difference -- an observation appeared or disappeared. The relative
+        # diff is NaN there and check_variable would treat NaN as within tolerance,
+        # silently passing it; force those cells to fail. Both-NaN stays NaN (and
+        # passes), since both being missing means they are equal.
+        only_one_nan = df_ref.isna() ^ df_cur.isna()
+        diff_df = diff_df.mask(only_one_nan, np.inf)
         diff_df = diff_df.to_frame()
 
     out, err, tol = check_variable(diff_df, df_tol)

diff --git a/util/fof_utils.py b/util/fof_utils.py
@@ -16,7 +16,7 @@ def get_report_variables(ds):
     Get variable names of reports.
     """
     vars_shape_report = []
-    shape_report = ds.attrs["n_hdr"]
+    shape_report = ds["d_hdr"].size
 
     for var in ds.data_vars:
         if ds[var].shape[0] == shape_report:
@@ -30,7 +30,7 @@ def get_observation_variables(ds):
     Get variable names of observations.
     """
     vars_shape_observation = []
-    shape_observation = ds.attrs["n_body"]
+    shape_observation = ds["d_body"].size
 
     for var in ds.data_vars:
         if ds[var].shape[0] == shape_observation:
@@ -45,6 +45,36 @@ def split_feedback_dataset(ds):
     expand lat, lon, statid and time_nomi according to l_body
     and sort them to assure unique order.
     """
+    nhdr = ds.attrs["n_hdr"]
+    nbody = ds.attrs["n_body"]
+
+    # The slice below strips body padding by COUNT, which assumes real
+    # observations are packed contiguously at the front [0:n_body] with padding
+    # as a NaN tail. dlat/dlon are a reliable body padding discriminator (non-NaN
+    # for real obs, NaN for padding) -- unlike veri_data, which is NaN for
+    # real-missing too. Validate the assumption on dlat so a scattered file fails
+    # loudly instead of being silently mis-stripped. (Only radar files have dlat;
+    # non-radar files are not over-allocated.)
+    if "dlat" in ds.data_vars and ds["dlat"].dims == ("d_body",):
+        dlat_isnan = np.isnan(ds["dlat"].values)
+        if dlat_isnan[:nbody].any() or not dlat_isnan[nbody:].all():
+            raise ValueError(
+                "radar fof body is not front-packed: expected dlat to be non-NaN "
+                f"on [0:{nbody}] and NaN on [{nbody}:{ds.sizes['d_body']}]. The "
+                "strip-padding-by-count assumption does not hold for this file."
+            )
+
+    ds = ds.isel(d_hdr=slice(0, nhdr), d_body=slice(0, nbody))
+
+    # veri_data is 2-D (d_veri, d_body). Everything downstream assumes a single
+    # verification run; d_veri > 1 would make to_dataframe() emit n_body * d_veri
+    # rows and silently misalign the comparison. Fail loudly instead.
+    if ds.sizes.get("d_veri", 1) != 1:
+        raise ValueError(
+            f"fof file has d_veri = {ds.sizes['d_veri']} verification runs; "
+            "comparison supports exactly one. Select a single run before comparing."
+        )
+
     report_variables = get_report_variables(ds)
     ds_reports = ds[report_variables]
 
@@ -64,7 +94,10 @@ def split_feedback_dataset(ds):
     observation_variables.append("veri_data")
 
     ds_obs = ds[observation_variables]
-    sort_keys_obs = ["lat", "lon", "statid", "varno", "level", "time_nomi"]
+    if "dlat" in ds.data_vars:
+        sort_keys_obs = ["dlat", "dlon", "statid", "varno", "level", "time_nomi"]
+    else:
+        sort_keys_obs = ["lat", "lon", "statid", "varno", "level", "time_nomi"]
     ds_obs_sorted = ds_obs.sortby(sort_keys_obs)
 
     return ds_report_sorted, ds_obs_sorted