From 1152a30ad7c522ced9be64460f7f8cc37761f98d Mon Sep 17 00:00:00 2001
From: Daniel Eduardo Villarreal Jaime <u0168535@kuleuven.be>
Date: Tue, 2 Dec 2025 19:52:35 +0100
Subject: [PATCH 1/7] Added Precision-REcall curve in probscores, similar to
 ROC curve but better for imbalanced distribution

---
 pysteps/verification/probscores.py | 149 +++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)

diff --git a/pysteps/verification/probscores.py b/pysteps/verification/probscores.py
index f1eb398ba..96425a684 100644
--- a/pysteps/verification/probscores.py
+++ b/pysteps/verification/probscores.py
@@ -20,6 +20,10 @@
     ROC_curve_init
     ROC_curve_accum
     ROC_curve_compute
+    PR_curve
+    PR_curve_init
+    PR_curve_accum
+    PR_curve_compute
 """
 
 import numpy as np
@@ -421,3 +425,148 @@ def ROC_curve_compute(ROC, compute_area=False):
         return POFD_vals, POD_vals, area
     else:
         return POFD_vals, POD_vals
+
+
+def PR_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False, compute_prevalence=False):
+    """
+    Compute the Precision-Recall curve, its area, and optionally the prevalence baseline.
+
+    Parameters
+    ----------
+    P_f: array_like
+      Forecasted probabilities for exceeding the threshold specified in the PR object.
+      Non-finite values are ignored.
+    X_o: array_like
+      Observed values. Non-finite values are ignored.
+    X_min: float
+      Precipitation intensity threshold for yes/no prediction.
+    n_prob_thrs: int
+      The number of probability thresholds to use.
+      The interval [0,1] is divided into n_prob_thrs evenly spaced values.
+    compute_area: bool, optional
+      If True, compute the area under the Precision-Recall curve.
+    compute_prevalence: bool, optional
+      If True, compute and return the prevalence baseline (fraction of positives).
+
+    Returns
+    -------
+    out: tuple
+      A two-element tuple containing precision and recall for the probability thresholds.
+      If compute_area is True, return the area under the PR curve as an extra element.
+      If compute_prevalence is True, return the prevalence baseline as an extra element.
+    """
+    P_f = P_f.copy()
+    X_o = X_o.copy()
+    pr = PR_curve_init(X_min, n_prob_thrs)
+    PR_curve_accum(pr, P_f, X_o)
+    return PR_curve_compute(pr, X_o, X_min, compute_area, compute_prevalence)
+
+
+def PR_curve_init(X_min, n_prob_thrs=10):
+    """
+    Initialize a Precision-Recall curve object.
+
+    Parameters
+    ----------
+    X_min: float
+      Precipitation intensity threshold for yes/no prediction.
+    n_prob_thrs: int
+      The number of probability thresholds to use.
+      The interval [0,1] is divided into n_prob_thrs evenly spaced values.
+
+    Returns
+    -------
+    out: dict
+      The PR curve object containing counters for hits, misses, false alarms,
+      correct negatives, and the probability thresholds.
+    """
+    PR = {}
+    PR["X_min"] = X_min
+    PR["hits"] = np.zeros(n_prob_thrs, dtype=int)
+    PR["misses"] = np.zeros(n_prob_thrs, dtype=int)
+    PR["false_alarms"] = np.zeros(n_prob_thrs, dtype=int)
+    PR["corr_neg"] = np.zeros(n_prob_thrs, dtype=int)
+    PR["prob_thrs"] = np.linspace(0.0, 1.0, int(n_prob_thrs))
+    return PR
+
+
+def PR_curve_accum(PR, P_f, X_o):
+    """
+    Accumulate the given probability-observation pairs into the given PR object.
+
+    Parameters
+    ----------
+    PR: dict
+      A PR curve object created with PR_curve_init.
+    P_f: array_like
+      Forecasted probabilities for exceeding the threshold specified in the PR object.
+      Non-finite values are ignored.
+    X_o: array_like
+      Observed values. Non-finite values are ignored.
+
+    Notes
+    -----
+    Updates the counters (hits, misses, false alarms, correct negatives) for each
+    probability threshold.
+    """
+    mask = np.logical_and(np.isfinite(P_f), np.isfinite(X_o))
+    P_f = P_f[mask]
+    X_o = X_o[mask]
+    for i, p in enumerate(PR["prob_thrs"]):
+        forecast_yes = P_f >= p
+        obs_yes = X_o >= PR["X_min"]
+        PR["hits"][i]+=np.sum(np.logical_and(forecast_yes, obs_yes))
+        PR["misses"][i]+=np.sum(np.logical_and(~forecast_yes, obs_yes))
+        PR["false_alarms"][i]+=np.sum(np.logical_and(forecast_yes, ~obs_yes))
+        PR["corr_neg"][i]+=np.sum(np.logical_and(~forecast_yes, ~obs_yes))
+
+
+def PR_curve_compute(PR, X_o, X_min, compute_area=False, compute_prevalence=False):
+    """
+    Compute the Precision-Recall curve, its area, and optionally prevalence baseline.
+
+    Parameters
+    ----------
+    PR: dict
+      A PR curve object created with PR_curve_init.
+    X_o: array_like
+      Observed values used to compute prevalence.
+    X_min: float
+      Precipitation intensity threshold for yes/no prediction.
+    compute_area: bool, optional
+      If True, compute the area under the Precision-Recall curve.
+    compute_prevalence: bool, optional
+      If True, compute and return the prevalence baseline (fraction of positives).
+
+    Returns
+    -------
+    out: tuple
+      Precision values and recall values for the probability thresholds.
+      If compute_area is True, return the area under the PR curve as an extra element.
+      If compute_prevalence is True, return the prevalence baseline as an extra element.
+    """
+    precision_vals = []
+    recall_vals = []
+    for i in range(len(PR["prob_thrs"])):
+        hits = PR["hits"][i]
+        misses = PR["misses"][i]
+        false_alarms = PR["false_alarms"][i]
+        recall = hits/(hits+misses) if (hits+misses)>0 else 0.0
+        precision = hits/(hits+false_alarms) if (hits+false_alarms)>0 else 1.0
+        recall_vals.append(recall)
+        precision_vals.append(precision)
+
+    results = (precision_vals, recall_vals)
+
+    if compute_area:
+        area = 0.0
+        for i in range(len(PR["prob_thrs"])-1):
+            area += (recall_vals[i+1]-recall_vals[i])*(precision_vals[i+1]+precision_vals[i])/2.0
+        results = results + (area,)
+
+    if compute_prevalence:
+        prevalence = np.sum(X_o >= X_min) / len(X_o)
+        results = results + (prevalence,)
+
+    return results
+

From 140f04b1cd3c4751ca824ae3b9331c18cfa5112d Mon Sep 17 00:00:00 2001
From: Daniel Eduardo Villarreal Jaime <u0168535@kuleuven.be>
Date: Mon, 8 Dec 2025 16:42:14 +0100
Subject: [PATCH 2/7] Corrected area computation, needed sorting first

---
 pysteps/verification/probscores.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/pysteps/verification/probscores.py b/pysteps/verification/probscores.py
index 96425a684..37a1a54c2 100644
--- a/pysteps/verification/probscores.py
+++ b/pysteps/verification/probscores.py
@@ -547,26 +547,29 @@ def PR_curve_compute(PR, X_o, X_min, compute_area=False, compute_prevalence=Fals
     """
     precision_vals = []
     recall_vals = []
+
     for i in range(len(PR["prob_thrs"])):
         hits = PR["hits"][i]
         misses = PR["misses"][i]
         false_alarms = PR["false_alarms"][i]
-        recall = hits/(hits+misses) if (hits+misses)>0 else 0.0
-        precision = hits/(hits+false_alarms) if (hits+false_alarms)>0 else 1.0
+
+        recall = hits / (hits + misses) if (hits + misses) > 0 else 0.0
+        precision = hits / (hits + false_alarms) if (hits + false_alarms) > 0 else 1.0
+
         recall_vals.append(recall)
         precision_vals.append(precision)
 
+    # Add endpoints: (0,1) and (1, prevalence)
+    prevalence = np.sum(X_o >= X_min) / len(X_o)
+    recall_vals = [0.0] + recall_vals + [1.0]
+    precision_vals = [1.0] + precision_vals + [prevalence]
+
     results = (precision_vals, recall_vals)
 
     if compute_area:
-        area = 0.0
-        for i in range(len(PR["prob_thrs"])-1):
-            area += (recall_vals[i+1]-recall_vals[i])*(precision_vals[i+1]+precision_vals[i])/2.0
+        # Sort by recall before integration
+        recall_sorted, precision_sorted = zip(*sorted(zip(recall_vals, precision_vals)))
+        area = np.trapz(precision_sorted, recall_sorted)
         results = results + (area,)
 
-    if compute_prevalence:
-        prevalence = np.sum(X_o >= X_min) / len(X_o)
-        results = results + (prevalence,)
-
-    return results
-
+    return results
\ No newline at end of file

From 22e1a741a6799d1ef97b6814945d4e4e99fd7a1c Mon Sep 17 00:00:00 2001
From: Daniel Eduardo Villarreal Jaime <u0168535@kuleuven.be>
Date: Mon, 8 Dec 2025 16:44:24 +0100
Subject: [PATCH 3/7] Corrected description

---
 pysteps/verification/probscores.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pysteps/verification/probscores.py b/pysteps/verification/probscores.py
index 37a1a54c2..6b5f0a741 100644
--- a/pysteps/verification/probscores.py
+++ b/pysteps/verification/probscores.py
@@ -521,7 +521,7 @@ def PR_curve_accum(PR, P_f, X_o):
         PR["corr_neg"][i]+=np.sum(np.logical_and(~forecast_yes, ~obs_yes))
 
 
-def PR_curve_compute(PR, X_o, X_min, compute_area=False, compute_prevalence=False):
+def PR_curve_compute(PR, X_o, X_min, compute_area=False):
     """
     Compute the Precision-Recall curve, its area, and optionally prevalence baseline.
 
@@ -535,15 +535,12 @@ def PR_curve_compute(PR, X_o, X_min, compute_area=False, compute_prevalence=Fals
       Precipitation intensity threshold for yes/no prediction.
     compute_area: bool, optional
       If True, compute the area under the Precision-Recall curve.
-    compute_prevalence: bool, optional
-      If True, compute and return the prevalence baseline (fraction of positives).
-
+    
     Returns
     -------
     out: tuple
       Precision values and recall values for the probability thresholds.
       If compute_area is True, return the area under the PR curve as an extra element.
-      If compute_prevalence is True, return the prevalence baseline as an extra element.
     """
     precision_vals = []
     recall_vals = []

From 47542cdafd6d23877855d95bd8ea9a1789744e6e Mon Sep 17 00:00:00 2001
From: Daniel Eduardo Villarreal Jaime <u0168535@kuleuven.be>
Date: Mon, 8 Dec 2025 16:45:17 +0100
Subject: [PATCH 4/7] Corrected description

---
 pysteps/verification/probscores.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pysteps/verification/probscores.py b/pysteps/verification/probscores.py
index 6b5f0a741..0cb2c9ba4 100644
--- a/pysteps/verification/probscores.py
+++ b/pysteps/verification/probscores.py
@@ -427,7 +427,7 @@ def ROC_curve_compute(ROC, compute_area=False):
         return POFD_vals, POD_vals
 
 
-def PR_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False, compute_prevalence=False):
+def PR_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False):
     """
     Compute the Precision-Recall curve, its area, and optionally the prevalence baseline.
 
@@ -445,15 +445,12 @@ def PR_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False, compute_preval
       The interval [0,1] is divided into n_prob_thrs evenly spaced values.
     compute_area: bool, optional
       If True, compute the area under the Precision-Recall curve.
-    compute_prevalence: bool, optional
-      If True, compute and return the prevalence baseline (fraction of positives).
-
+    
     Returns
     -------
     out: tuple
       A two-element tuple containing precision and recall for the probability thresholds.
       If compute_area is True, return the area under the PR curve as an extra element.
-      If compute_prevalence is True, return the prevalence baseline as an extra element.
     """
     P_f = P_f.copy()
     X_o = X_o.copy()

From bb30bad57bf9e243d6c5d372980ec9ded4b01a4e Mon Sep 17 00:00:00 2001
From: Daniel Eduardo Villarreal Jaime <u0168535@kuleuven.be>
Date: Mon, 8 Dec 2025 16:46:26 +0100
Subject: [PATCH 5/7] Removed prevalence option

---
 pysteps/verification/probscores.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pysteps/verification/probscores.py b/pysteps/verification/probscores.py
index 0cb2c9ba4..d2e9e1a36 100644
--- a/pysteps/verification/probscores.py
+++ b/pysteps/verification/probscores.py
@@ -456,7 +456,7 @@ def PR_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False):
     X_o = X_o.copy()
     pr = PR_curve_init(X_min, n_prob_thrs)
     PR_curve_accum(pr, P_f, X_o)
-    return PR_curve_compute(pr, X_o, X_min, compute_area, compute_prevalence)
+    return PR_curve_compute(pr, X_o, X_min, compute_area)
 
 
 def PR_curve_init(X_min, n_prob_thrs=10):

From 19640651671f4e5d59a3d50d63ef695bb2d8fc3f Mon Sep 17 00:00:00 2001
From: Daniel Eduardo Villarreal Jaime <u0168535@kuleuven.be>
Date: Tue, 9 Dec 2025 12:59:41 +0100
Subject: [PATCH 6/7] Removed prevalence completely

---
 pysteps/verification/probscores.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/pysteps/verification/probscores.py b/pysteps/verification/probscores.py
index d2e9e1a36..2ceceb1b6 100644
--- a/pysteps/verification/probscores.py
+++ b/pysteps/verification/probscores.py
@@ -512,10 +512,10 @@ def PR_curve_accum(PR, P_f, X_o):
     for i, p in enumerate(PR["prob_thrs"]):
         forecast_yes = P_f >= p
         obs_yes = X_o >= PR["X_min"]
-        PR["hits"][i]+=np.sum(np.logical_and(forecast_yes, obs_yes))
-        PR["misses"][i]+=np.sum(np.logical_and(~forecast_yes, obs_yes))
-        PR["false_alarms"][i]+=np.sum(np.logical_and(forecast_yes, ~obs_yes))
-        PR["corr_neg"][i]+=np.sum(np.logical_and(~forecast_yes, ~obs_yes))
+        PR["hits"][i] += np.sum(np.logical_and(forecast_yes, obs_yes))
+        PR["misses"][i] += np.sum(np.logical_and(~forecast_yes, obs_yes))
+        PR["false_alarms"][i] += np.sum(np.logical_and(forecast_yes, ~obs_yes))
+        PR["corr_neg"][i] += np.sum(np.logical_and(~forecast_yes, ~obs_yes))
 
 
 def PR_curve_compute(PR, X_o, X_min, compute_area=False):
@@ -553,17 +553,10 @@ def PR_curve_compute(PR, X_o, X_min, compute_area=False):
         recall_vals.append(recall)
         precision_vals.append(precision)
 
-    # Add endpoints: (0,1) and (1, prevalence)
-    prevalence = np.sum(X_o >= X_min) / len(X_o)
-    recall_vals = [0.0] + recall_vals + [1.0]
-    precision_vals = [1.0] + precision_vals + [prevalence]
-
-    results = (precision_vals, recall_vals)
-
     if compute_area:
         # Sort by recall before integration
         recall_sorted, precision_sorted = zip(*sorted(zip(recall_vals, precision_vals)))
         area = np.trapz(precision_sorted, recall_sorted)
-        results = results + (area,)
-
-    return results
\ No newline at end of file
+        return precision_vals, recall_vals, area
+    else:
+        return precision_vals, recall_vals
\ No newline at end of file

From a4a44db3db1fe682fcece9004d19c3228a0637e5 Mon Sep 17 00:00:00 2001
From: Daniel Eduardo Villarreal Jaime <u0168535@kuleuven.be>
Date: Tue, 9 Dec 2025 13:02:25 +0100
Subject: [PATCH 7/7] Updated description

---
 pysteps/verification/probscores.py | 102 +++++++++++++++--------------
 1 file changed, 53 insertions(+), 49 deletions(-)

diff --git a/pysteps/verification/probscores.py b/pysteps/verification/probscores.py
index 2ceceb1b6..8108b8673 100644
--- a/pysteps/verification/probscores.py
+++ b/pysteps/verification/probscores.py
@@ -429,28 +429,29 @@ def ROC_curve_compute(ROC, compute_area=False):
 
 def PR_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False):
     """
-    Compute the Precision-Recall curve, its area, and optionally the prevalence baseline.
+    Compute the Precision–Recall (PR) curve and optionally its area.
 
     Parameters
     ----------
-    P_f: array_like
-      Forecasted probabilities for exceeding the threshold specified in the PR object.
-      Non-finite values are ignored.
-    X_o: array_like
-      Observed values. Non-finite values are ignored.
-    X_min: float
-      Precipitation intensity threshold for yes/no prediction.
-    n_prob_thrs: int
-      The number of probability thresholds to use.
-      The interval [0,1] is divided into n_prob_thrs evenly spaced values.
-    compute_area: bool, optional
-      If True, compute the area under the Precision-Recall curve.
-    
+    P_f : array_like
+        Forecasted probabilities for exceeding the threshold X_min.
+        Non-finite values are ignored.
+    X_o : array_like
+        Observed values. Non-finite values are ignored.
+    X_min : float
+        Precipitation intensity threshold for yes/no prediction.
+    n_prob_thrs : int, optional
+        Number of probability thresholds to evaluate.
+        The interval [0, 1] is divided into n_prob_thrs evenly spaced values.
+    compute_area : bool, optional
+        If True, compute the area under the PR curve using trapezoidal integration.
+
     Returns
     -------
-    out: tuple
-      A two-element tuple containing precision and recall for the probability thresholds.
-      If compute_area is True, return the area under the PR curve as an extra element.
+    out : tuple
+        (precision_vals, recall_vals) for each probability threshold.
+        If compute_area is True, return (precision_vals, recall_vals, area),
+        where area is the trapezoidal estimate of the PR curve area.
     """
     P_f = P_f.copy()
     X_o = X_o.copy()
@@ -461,21 +462,24 @@ def PR_curve(P_f, X_o, X_min, n_prob_thrs=10, compute_area=False):
 
 def PR_curve_init(X_min, n_prob_thrs=10):
     """
-    Initialize a Precision-Recall curve object.
+    Initialize a Precision–Recall curve object.
 
     Parameters
     ----------
-    X_min: float
-      Precipitation intensity threshold for yes/no prediction.
-    n_prob_thrs: int
-      The number of probability thresholds to use.
-      The interval [0,1] is divided into n_prob_thrs evenly spaced values.
+    X_min : float
+        Precipitation intensity threshold for yes/no prediction.
+    n_prob_thrs : int, optional
+        Number of probability thresholds to evaluate.
 
     Returns
     -------
-    out: dict
-      The PR curve object containing counters for hits, misses, false alarms,
-      correct negatives, and the probability thresholds.
+    PR : dict
+        Dictionary containing counters for hits, misses, false alarms,
+        correct negatives, and the probability thresholds.
+        Keys:
+            - "X_min" : threshold value
+            - "hits", "misses", "false_alarms", "corr_neg" : arrays of counts
+            - "prob_thrs" : array of evenly spaced thresholds in [0, 1]
     """
     PR = {}
     PR["X_min"] = X_min
@@ -489,22 +493,21 @@ def PR_curve_init(X_min, n_prob_thrs=10):
 
 def PR_curve_accum(PR, P_f, X_o):
     """
-    Accumulate the given probability-observation pairs into the given PR object.
+    Accumulate forecast–observation pairs into the PR object.
 
     Parameters
     ----------
-    PR: dict
-      A PR curve object created with PR_curve_init.
-    P_f: array_like
-      Forecasted probabilities for exceeding the threshold specified in the PR object.
-      Non-finite values are ignored.
-    X_o: array_like
-      Observed values. Non-finite values are ignored.
+    PR : dict
+        A PR curve object created with PR_curve_init.
+    P_f : array_like
+        Forecasted probabilities for exceeding X_min.
+    X_o : array_like
+        Observed values.
 
     Notes
     -----
-    Updates the counters (hits, misses, false alarms, correct negatives) for each
-    probability threshold.
+    Updates the counters (hits, misses, false alarms, correct negatives)
+    for each probability threshold in PR["prob_thrs"].
     """
     mask = np.logical_and(np.isfinite(P_f), np.isfinite(X_o))
     P_f = P_f[mask]
@@ -520,24 +523,25 @@ def PR_curve_accum(PR, P_f, X_o):
 
 def PR_curve_compute(PR, X_o, X_min, compute_area=False):
     """
-    Compute the Precision-Recall curve, its area, and optionally prevalence baseline.
+    Compute precision and recall values from the PR object.
 
     Parameters
     ----------
-    PR: dict
-      A PR curve object created with PR_curve_init.
-    X_o: array_like
-      Observed values used to compute prevalence.
-    X_min: float
-      Precipitation intensity threshold for yes/no prediction.
-    compute_area: bool, optional
-      If True, compute the area under the Precision-Recall curve.
-    
+    PR : dict
+        A PR curve object created with PR_curve_init.
+    X_o : array_like
+        Observed values (used only if prevalence or area is computed).
+    X_min : float
+        Precipitation intensity threshold for yes/no prediction.
+    compute_area : bool, optional
+        If True, compute the area under the PR curve.
+
     Returns
     -------
-    out: tuple
-      Precision values and recall values for the probability thresholds.
-      If compute_area is True, return the area under the PR curve as an extra element.
+    out : tuple
+        (precision_vals, recall_vals) for each probability threshold.
+        If compute_area is True, return (precision_vals, recall_vals, area),
+        where area is the trapezoidal estimate of the PR curve area.
     """
     precision_vals = []
     recall_vals = []