From d8030cc92c1fbfe7273a4d117949f7f676f4b613 Mon Sep 17 00:00:00 2001 From: Mathieu Dupont <108517594+mathieudpnt@users.noreply.github.com> Date: Thu, 26 Mar 2026 12:18:08 +0100 Subject: [PATCH] osekit update --- src/post_processing/utils/filtering_utils.py | 2 +- src/post_processing/utils/fpod_utils.py | 33 ++++++++++++-------- src/post_processing/utils/pamguard_utils.py | 19 ++++++----- tests/conftest.py | 2 +- tests/test_fpod_utils.py | 18 +++++------ 5 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py index 46d486a..92384af 100644 --- a/src/post_processing/utils/filtering_utils.py +++ b/src/post_processing/utils/filtering_utils.py @@ -10,7 +10,7 @@ import numpy as np import pytz from osekit.config import TIMESTAMP_FORMAT_AUDIO_FILE -from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text +from osekit.utils.timestamp import strftime_osmose_format, strptime_from_text from pandas import ( DataFrame, Timedelta, diff --git a/src/post_processing/utils/fpod_utils.py b/src/post_processing/utils/fpod_utils.py index 08569c1..d108a10 100644 --- a/src/post_processing/utils/fpod_utils.py +++ b/src/post_processing/utils/fpod_utils.py @@ -22,7 +22,7 @@ sqrt, zeros, ) -from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text +from osekit.utils.timestamp import strftime_osmose_format, strptime_from_text from pandas import ( DataFrame, DateOffset, @@ -206,8 +206,9 @@ def get_feeding_buzz_datetime(row: Series) -> Timestamp: exceptions.append(e) try: - return (strptime_from_text(row["Minute"], "%-d/%-m/%Y %H:%M") + - Timedelta(microseconds=row["microsec"])) + return strptime_from_text(row["Minute"], "%-d/%-m/%Y %H:%M") + Timedelta( + microseconds=row["microsec"] + ) except (KeyError, TypeError, ValueError) as e: exceptions.append(e) @@ -347,10 +348,9 @@ def gmm_ici( """ df, ici = log_ici(df) - gmm = mixture.GaussianMixture(n_components=comp, - covariance_type="full", - random_state=42, - n_init=20) + gmm = mixture.GaussianMixture( + n_components=comp, covariance_type="full", random_state=42, n_init=20 + ) labels = gmm.fit_predict(ici) rank = argsort(argsort(gmm.means_.flatten())) @@ -416,7 +416,11 @@ def cluster_ici( gmm = mixture.GaussianMixture(n_components=comp, covariance_type="full") gmm.fit(ar_ici) - component_names = ["Buzz ICIs", "Regular ICIs", "Long ICIs",] + component_names = [ + "Buzz ICIs", + "Regular ICIs", + "Long ICIs", + ] cluster_info = [] for i in range(comp): means = sort(gmm.means_, axis=0)[i][0] @@ -512,10 +516,12 @@ def plot_gmm_ici( x_axis, gmm_icis.weights_[idx] * stats.norm.pdf( - x_axis, gmm_icis.means_[idx, 0], sqrt(gmm_icis.covariances_[idx, 0, 0]), + x_axis, + gmm_icis.means_[idx, 0], + sqrt(gmm_icis.covariances_[idx, 0, 0]), ).ravel(), label=f"(μ={gmm_icis.means_[idx, 0]:.2f}," - f"σ={sqrt(gmm_icis.covariances_[idx, 0, 0]):.2f})", + f"σ={sqrt(gmm_icis.covariances_[idx, 0, 0]):.2f})", ) lines += [line] (mixture_line,) = ax.plot( @@ -640,8 +646,7 @@ def percent_calc( # Aggregate and compute metrics df = ( - data - .groupby(time_unit) + data.groupby(time_unit) .agg( { "DPh": "sum", @@ -656,7 +661,9 @@ def percent_calc( df["%click"] = df["dpm_count"] * 100 / (df["Day"] * 60) df["%DPh"] = df["DPh"] * 100 / df["Day"] df["FBR"] = df.apply( - lambda row: (row["Foraging"] * 100 / row["dpm_count"]) if row["dpm_count"] > 0 else 0, + lambda row: (row["Foraging"] * 100 / row["dpm_count"]) + if row["dpm_count"] > 0 + else 0, axis=1, ) df["%buzzes"] = df["Foraging"] * 100 / (df["Day"] * 60) diff --git a/src/post_processing/utils/pamguard_utils.py b/src/post_processing/utils/pamguard_utils.py index 02719ab..d595835 100644 --- a/src/post_processing/utils/pamguard_utils.py +++ b/src/post_processing/utils/pamguard_utils.py @@ -1,7 +1,7 @@ from pathlib import Path -from osekit.core_api.audio_data import AudioData -from osekit.utils.timestamp_utils import strftime_osmose_format +from osekit.core.audio_data import AudioData +from osekit.utils.timestamp import strftime_osmose_format from pandas import DataFrame, Timedelta, Timestamp from pypamguard import load_pamguard_binary_folder from pypamguard.core.filters import DateFilter, Filters @@ -11,11 +11,12 @@ logger.set_verbosity(Verbosity.ERROR) -def process_binary(audio: AudioData, - binary: Path, - dataset: str, - annotation: str, - ) -> DataFrame: +def process_binary( + audio: AudioData, + binary: Path, + dataset: str, + annotation: str, +) -> DataFrame: r"""Process PAMGuard binary files into APLOSE DataFrame. Parameters @@ -64,7 +65,9 @@ def process_binary(audio: AudioData, """ filter_obj = Filters( { - "daterange": DateFilter(start_date=audio.begin, end_date=audio.end, ordered=True), + "daterange": DateFilter( + start_date=audio.begin, end_date=audio.end, ordered=True + ), }, ) diff --git a/tests/conftest.py b/tests/conftest.py index 3d8998e..adfa6a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,7 @@ import pytest import soundfile as sf import yaml -from osekit.utils.timestamp_utils import strftime_osmose_format +from osekit.utils.timestamp import strftime_osmose_format from pandas import DataFrame, read_csv from pandas.tseries import frequencies diff --git a/tests/test_fpod_utils.py b/tests/test_fpod_utils.py index d1c4ece..51a4deb 100644 --- a/tests/test_fpod_utils.py +++ b/tests/test_fpod_utils.py @@ -1,4 +1,5 @@ """FPOD/ CPOD processing functions tests.""" + import pytest import pytz from pandas import DataFrame @@ -96,7 +97,7 @@ # # return data.reset_index(drop=True) -#@pytest.fixture(scope="module") +# @pytest.fixture(scope="module") # @dt.working_directory(__file__) # def df_raw() -> DataFrame: # return read_csv("pod_raw.csv") @@ -106,7 +107,7 @@ # def df_ap() -> DataFrame: # return read_csv("pod_aplose.csv") -#@pytest.mark.mandatory +# @pytest.mark.mandatory # def test_columns(df_raw: DataFrame) -> None: # dt.validate( # df_raw.columns, @@ -142,7 +143,7 @@ def test_csv_folder_single_file(tmp_path) -> None: csv_file = tmp_path / "data.csv" csv_file.write_text("col1;col2\nval1;val2\nval3;val4", encoding="latin-1") - result = load_pod_folder(tmp_path) + result = load_pod_folder(tmp_path, "csv") assert isinstance(result, DataFrame) assert len(result) == 2 @@ -453,20 +454,17 @@ def test_pod2aplose_index_reset(timezone): """Test that index is properly reset after sorting.""" df = DataFrame({ "ChunkEnd": ["15/01/2024 12:00", "15/01/2024 10:00"], - "deploy.name": ["d1", "d2"] + "deploy.name": ["d1", "d2"], }) result = pod2aplose( - df=df, - tz=timezone, - dataset_name="dataset", - annotation="click", - annotator="john" + df=df, tz=timezone, dataset_name="dataset", annotation="click", annotator="john" ) # Index should be 0, 1 after reset assert result.index.tolist() == [0, 1] + # meta_cut_aplose @@ -596,4 +594,4 @@ def test_pod2aplose_index_reset(timezone): # result = deploy_period(df) # assert_frame_equal(result, expected) -# actual_data \ No newline at end of file +# actual_data