Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
5752e6b
duplicate timestamp in sperate module
vergauwenthomas Jan 13, 2026
647454d
invalid check in seperate module
vergauwenthomas Jan 13, 2026
593d8aa
regular checks now use the QCresult class per sensor
vergauwenthomas Jan 13, 2026
fff8212
Refactor the buddy check over multiple modules and catch logs using a…
vergauwenthomas Jan 13, 2026
c60bd5d
Merge branch 'dev' into detailed_outlier_flags
vergauwenthomas Jan 20, 2026
aa94f0c
minor version bump
vergauwenthomas Jan 20, 2026
7a830b4
fix qc_stats
vergauwenthomas Jan 20, 2026
a6ed7ee
labels defined in settings
vergauwenthomas Jan 20, 2026
6a9f5c1
bugfix when empyt outliers
vergauwenthomas Jan 20, 2026
e512a3c
fix test
vergauwenthomas Jan 20, 2026
c7263c0
fix issue on get_qc_stats with empty outliersdf
vergauwenthomas Jan 21, 2026
b271f49
bugfix for unmet condition error
vergauwenthomas Jan 21, 2026
2d55b16
colorbugfix
vergauwenthomas Jan 21, 2026
3b0d081
bugfix when no whitelist is used
vergauwenthomas Jan 21, 2026
4208e30
drop unused method
vergauwenthomas Jan 21, 2026
0975bed
drop unused variable
vergauwenthomas Jan 21, 2026
b0a0215
add get_qc in test, for sanity checking
vergauwenthomas Jan 21, 2026
bec3082
add test with buddy and mf=true
vergauwenthomas Jan 26, 2026
99d5be8
fix the processing of safetynet details + creation of a final deatail…
vergauwenthomas Jan 26, 2026
2324db4
rename min_sample_spread and use the z_robust bool arg + robust z score
vergauwenthomas Jan 26, 2026
0ab00ac
implement the MP version of buddy check
vergauwenthomas Jan 27, 2026
e69c1c4
fix the number of cpus by settings for MP qc checks
vergauwenthomas Jan 27, 2026
aac7c2e
min_std deprecation warning
vergauwenthomas Jan 27, 2026
c86471e
update some tests
vergauwenthomas Jan 28, 2026
2a297b1
fix and add a buddy test
vergauwenthomas Jan 28, 2026
072fc8c
buddy check test solutions
vergauwenthomas Jan 28, 2026
71b5328
without storing values in qcresult
vergauwenthomas Jan 28, 2026
7fd2e1e
rename update
vergauwenthomas Jan 28, 2026
a11bc92
qc_overview_df for sensordata
vergauwenthomas Jan 28, 2026
a2a946b
qc_overview_df for station and dataset level
vergauwenthomas Jan 28, 2026
610dd9d
choose to use basic or detailed labels for outliersdf
vergauwenthomas Jan 28, 2026
cf96677
bugfix when saftynet is triggerd in previous iteration, but not in cu…
vergauwenthomas Jan 28, 2026
17be95b
put the qc detail df constructors in a seperate module
vergauwenthomas Jan 29, 2026
f5c19fa
qc overview construction in seperate module
vergauwenthomas Jan 29, 2026
06f5d29
from buddywrapstation to buddywrapsensor
vergauwenthomas Jan 29, 2026
651688f
drop the MP executor locals from the qcresult so make it serializable…
vergauwenthomas Jan 29, 2026
5bca2ee
replace index for details by using the flags index
vergauwenthomas Jan 30, 2026
bda96af
add some logs
vergauwenthomas Jan 30, 2026
b8771bf
minor version bump
vergauwenthomas Jan 30, 2026
07e6f7c
log full wrapstation on error (debugging)
vergauwenthomas Feb 2, 2026
c525201
Potential bug fixes
vergauwenthomas Feb 2, 2026
01adb96
improve memory and load distribution for multiprocessing
vergauwenthomas Feb 2, 2026
1396f52
minor version bump
vergauwenthomas Feb 2, 2026
fb756c7
add a todo
vergauwenthomas Feb 3, 2026
5385cd4
drop debug lines
vergauwenthomas Feb 3, 2026
a870ffd
bugfix, return checksettings
vergauwenthomas Feb 3, 2026
456a248
minor version bump
vergauwenthomas Feb 3, 2026
6e3b8d4
hardcode the checksettings, there was a memory leak and unwanted data…
vergauwenthomas Feb 4, 2026
17c8a36
fix memory leak issue and to mutch info in qcchecksettings with locals()
vergauwenthomas Feb 4, 2026
fc828a2
remove complex joining functions since parallelization is per station
vergauwenthomas Feb 5, 2026
851452b
debug version bump
vergauwenthomas Feb 5, 2026
0d26d6c
add the max N buddies functionallity+ minor version bump
vergauwenthomas Feb 6, 2026
bea4623
details bugfix
vergauwenthomas Feb 9, 2026
c6b1e73
DEBUG, return wrapsensors for analysis
vergauwenthomas Feb 9, 2026
c6a6db5
debug version bump
vergauwenthomas Feb 9, 2026
a708338
bump because of invalid version format
vergauwenthomas Feb 9, 2026
517ed84
add only_if_previous_had_no_buddies functionallity for buddy saftynets
vergauwenthomas Feb 10, 2026
55596de
implement the minimum buddy radius functionallity
vergauwenthomas Feb 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ requires = ["poetry-core"]

[project]
name = "MetObs-toolkit"
version = "1.0.0a1"
version = "1.0.0a13"
license = "LICENSE"
authors = [{name = "Thomas Vergauwen", email = "thomas.vergauwen@ugent.be"}]
description = "A Meteorological observations toolkit for scientists"
Expand Down
390 changes: 233 additions & 157 deletions src/metobs_toolkit/dataset.py

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion src/metobs_toolkit/gf_collection/overview_df_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
(sensordata, station, dataset) for overviews and summaries of Gaps."""

import pandas as pd
from typing import Union
from metobs_toolkit.backend_collection.dev_collection import copy_doc
from metobs_toolkit.backend_collection.df_helpers import save_concat

#===============================
# Gap overiview
#===============================

def sensordata_gap_status_overview_df(sensordata) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -80,7 +84,6 @@ def sensordata_gap_status_overview_df(sensordata) -> pd.DataFrame:
index=pd.Index([], name="gapstart"),
)


@copy_doc(sensordata_gap_status_overview_df)
def station_gap_status_overview_df(station) -> pd.DataFrame:
concatlist = []
Expand Down Expand Up @@ -127,3 +130,4 @@ def dataset_gap_status_overview_df(dataset) -> pd.DataFrame:
),
)
return combdf

253 changes: 183 additions & 70 deletions src/metobs_toolkit/plot_collection/qc_info_pies.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,40 @@

logger = logging.getLogger("<metobs_toolkit>")

def autopct_format(pct):
return f'{pct:.1f}%' if pct > 0 else ''


@log_entry
def qc_overview_pies(
df: pd.DataFrame,
end_labels_from_df: pd.Series,
end_labels_from_outliers: pd.Series,
per_check_labels: pd.Series,
fig_title: str = ""

) -> plt.Figure:
"""
Generate a quality control (QC) overview using pie charts.
Generate a QC overview figure with pie charts for label frequencies and per-check outcomes.

Parameters
----------
df : pandas.DataFrame
DataFrame containing QC data. Must include columns 'N_labeled', 'N_all', and 'N_checked'.
end_labels_from_df : pandas.Series
Counts of final labels for all records (index as labels), typically from
``SensorData.df['label'].value_counts()``.
end_labels_from_outliers : pandas.Series
Counts limited to outlier records (index as labels), e.g. from
``SensorData.outliersdf['label'].value_counts()``. If empty, a single slice
"No QC outliers" is drawn.
per_check_labels : pandas.Series
MultiIndex Series with index levels ``['checkname', 'flag']`` containing counts per
QC check outcome (flags such as ``flagged_cond``, ``pass_cond``, ``unmet_cond``, etc.),
as returned by ``SensorData.get_qc_freq_statistics()``.

Returns
-------
matplotlib.figure.Figure
The generated figure containing the QC overview pie charts.

Raises
------
TypeError
If any of the arguments are not of the expected type.
Figure containing two large pies (all labels and outlier labels) and one small pie per
QC check showing the distribution of its outcomes.
"""

# Define layout
Expand All @@ -48,97 +60,198 @@ def qc_overview_pies(
ax_thr = fig.add_subplot(spec[0, 2:]) # top half right

# Frequency with all
plotdf = df
colors = [Settings._get_color_from_label(label) for label in plotdf.index]
plotdf.plot(
colors = [Settings._get_color_from_label(label) for label in end_labels_from_df.index]
end_labels_from_df.plot(
ax=ax_thl,
kind="pie",
y="N_labeled",
autopct="%1.1f%%",
y="",
autopct=autopct_format,
legend=False,
colors=colors,
radius=Settings.get("plotting_settings.pie_charts.radius_big"),
fontsize=Settings.get("plotting_settings.pie_charts.txt_size_big_pies"),
fontsize=Settings.get("plotting_settings.pie_charts.txt_label_size_big_pies"),
)
ax_thl.set_title("Label frequencies")
ax_thl.set_title("Label frequencies", **Settings.get("plotting_settings.pie_charts.big_pie_title_kwargs"))
ax_thl.set_ylabel("")

# Outliers comparison
plotdf = df[
~df.index.isin(
[
Settings.get("label_def.goodrecord.label"),
Settings.get("label_def.regular_gap.label"),
]
)
]

colors = [Settings._get_color_from_label(label) for label in plotdf.index]
# Only outliers

colors = [Settings._get_color_from_label(label) for label in end_labels_from_outliers.index]

if plotdf.empty:
if end_labels_from_outliers.empty:
# No outliers --> full pie with "No QC outliers" in the color of 'ok'
plotdf = pd.DataFrame(
data={"N_labeled": [100]}, index=pd.Index(data=["No QC outliers"])
)
colors = [Settings.get("label_def.goodrecord.color")]
end_labels_from_outliers = pd.Series([100], index=["No QC outliers"])
colors = [Settings._get_color_from_label('ok')]

plotdf.plot(
end_labels_from_outliers.plot(
ax=ax_thr,
kind="pie",
y="N_labeled",
autopct="%1.1f%%",
y="",
autopct=autopct_format,
legend=False,
colors=colors,
radius=Settings.get("plotting_settings.pie_charts.radius_big"),
fontsize=Settings.get("plotting_settings.pie_charts.txt_size_big_pies"),
fontsize=Settings.get("plotting_settings.pie_charts.txt_label_size_big_pies"),
)
ax_thr.set_title("Outlier specific frequencies")
ax_thr.set_title("Outlier specific frequencies", **Settings.get("plotting_settings.pie_charts.big_pie_title_kwargs"))
ax_thr.set_ylabel("")

# Performance per check
plotdf = df[
~df.index.isin(
[
Settings.get("label_def.goodrecord.label"),
Settings.get("label_def.regular_gap.label"),
]
)
]

# Label to QC check name map
label_too_qcname_map = Settings._label_to_qccheckmap()
per_qc_colmap = {val['label']: val['plotkwargs']['color'] for val in Settings.get('qc_status_labels_per_check').values()}


i = 0
for idx, row in plotdf.iterrows():
# Target a specific axes
for checkname in per_check_labels.index.get_level_values('checkname').unique():
subax = fig.add_subplot(spec[math.floor(i / ncol) + 1, i % ncol])

# Construct a plot Series
plotseries = pd.Series(
{
Settings.get("label_def.uncheckedrecord.label"): row["N_all"]
- row["N_checked"],
Settings.get("label_def.goodrecord.label"): row["N_checked"]
- row["N_labeled"],
Settings.get("label_def.outlier.label"): row["N_labeled"],
}
)
# Define colors
colors = [Settings._get_color_from_label(label) for label in plotseries.index]
plotseries.plot(

checkname_subset = per_check_labels.loc[checkname]
colors = [per_qc_colmap.get(label, 'gray') for label in checkname_subset.index]

checkname_subset.plot(
ax=subax,
kind="pie",
autopct="%1.1f%%",
autopct=autopct_format,
legend=False,
colors=colors,
radius=Settings.get("plotting_settings.pie_charts.radius_small"),
fontsize=Settings.get("plotting_settings.pie_charts.txt_size_small_pies"),
fontsize=Settings.get("plotting_settings.pie_charts.txt_label_size_small_pies"),
)

subax.set_title(f"Effectiveness of {label_too_qcname_map[idx]}")
subax.set_title(f"{checkname}", **Settings.get("plotting_settings.pie_charts.small_pie_title_kwargs"))
subax.set_ylabel("")

i += 1

logger.debug("Exiting qc_overview_pies function.")


fig.suptitle(fig_title, **Settings.get("plotting_settings.pie_charts.fig_title_kwargs"))
return fig


# @log_entry
# def qc_overview_pies(
# df: pd.DataFrame,
# ) -> plt.Figure:
# """
# Generate a quality control (QC) overview using pie charts.

# Parameters
# ----------
# df : pandas.DataFrame
# DataFrame containing QC data. Must include columns 'N_labeled', 'N_all', and 'N_checked'.

# Returns
# -------
# matplotlib.figure.Figure
# The generated figure containing the QC overview pie charts.

# Raises
# ------
# TypeError
# If any of the arguments are not of the expected type.
# """

# # Define layout
# ax = create_axes(**Settings.get("plotting_settings.pie_charts.figkwargs"))
# ax.set_axis_off()
# fig = ax.get_figure()

# ncol = Settings.get("plotting_settings.pie_charts.ncols")
# spec = fig.add_gridspec(4, ncol)
# ax_thl = fig.add_subplot(spec[0, :2]) # top half left
# ax_thr = fig.add_subplot(spec[0, 2:]) # top half right

# # Frequency with all
# plotdf = df
# colors = [Settings._get_color_from_label(label) for label in plotdf.index]
# plotdf.plot(
# ax=ax_thl,
# kind="pie",
# y="N_labeled",
# autopct="%1.1f%%",
# legend=False,
# colors=colors,
# radius=Settings.get("plotting_settings.pie_charts.radius_big"),
# fontsize=Settings.get("plotting_settings.pie_charts.txt_size_big_pies"),
# )
# ax_thl.set_title("Label frequencies")
# ax_thl.set_ylabel("")

# # Outliers comparison
# plotdf = df[
# ~df.index.isin(
# [
# Settings.get("label_def.goodrecord.label"),
# Settings.get("label_def.regular_gap.label"),
# ]
# )
# ]

# colors = [Settings._get_color_from_label(label) for label in plotdf.index]

# if plotdf.empty:
# # No outliers --> full pie with "No QC outliers" in the color of 'ok'
# plotdf = pd.DataFrame(
# data={"N_labeled": [100]}, index=pd.Index(data=["No QC outliers"])
# )
# colors = [Settings.get("label_def.goodrecord.color")]

# plotdf.plot(
# ax=ax_thr,
# kind="pie",
# y="N_labeled",
# autopct="%1.1f%%",
# legend=False,
# colors=colors,
# radius=Settings.get("plotting_settings.pie_charts.radius_big"),
# fontsize=Settings.get("plotting_settings.pie_charts.txt_size_big_pies"),
# )
# ax_thr.set_title("Outlier specific frequencies")
# ax_thr.set_ylabel("")

# # Performance per check
# plotdf = df[
# ~df.index.isin(
# [
# Settings.get("label_def.goodrecord.label"),
# Settings.get("label_def.regular_gap.label"),
# ]
# )
# ]

# # Label to QC check name map
# label_too_qcname_map = Settings._label_to_qccheckmap()

# i = 0
# for idx, row in plotdf.iterrows():
# # Target a specific axes
# subax = fig.add_subplot(spec[math.floor(i / ncol) + 1, i % ncol])

# # Construct a plot Series
# plotseries = pd.Series(
# {
# Settings.get("label_def.uncheckedrecord.label"): row["N_all"]
# - row["N_checked"],
# Settings.get("label_def.goodrecord.label"): row["N_checked"]
# - row["N_labeled"],
# Settings.get("label_def.outlier.label"): row["N_labeled"],
# }
# )
# # Define colors
# colors = [Settings._get_color_from_label(label) for label in plotseries.index]
# plotseries.plot(
# ax=subax,
# kind="pie",
# autopct="%1.1f%%",
# legend=False,
# colors=colors,
# radius=Settings.get("plotting_settings.pie_charts.radius_small"),
# fontsize=Settings.get("plotting_settings.pie_charts.txt_size_small_pies"),
# )

# subax.set_title(f"Effectiveness of {label_too_qcname_map[idx]}")
# subax.set_ylabel("")

# i += 1

# logger.debug("Exiting qc_overview_pies function.")
# return fig
4 changes: 3 additions & 1 deletion src/metobs_toolkit/qc_collection/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# flake8: noqa: F401

from .duplicated_timestamp import duplicated_timestamp_check
from .invalid_check import drop_invalid_values
from .grossvalue_check import gross_value_check
from .persistence_check import persistence_check
from .repetitions_check import repetitions_check
from .step_check import step_check
from .window_variation_check import window_variation_check
from .buddy_check import toolkit_buddy_check
from .spatial_checks.buddy_check import toolkit_buddy_check
Loading