Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/post_processing/utils/filtering_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import pytz
from osekit.config import TIMESTAMP_FORMAT_AUDIO_FILE
from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text
from osekit.utils.timestamp import strftime_osmose_format, strptime_from_text
from pandas import (
DataFrame,
Timedelta,
Expand Down
33 changes: 20 additions & 13 deletions src/post_processing/utils/fpod_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
sqrt,
zeros,
)
from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text
from osekit.utils.timestamp import strftime_osmose_format, strptime_from_text
from pandas import (
DataFrame,
DateOffset,
Expand Down Expand Up @@ -206,8 +206,9 @@ def get_feeding_buzz_datetime(row: Series) -> Timestamp:
exceptions.append(e)

try:
return (strptime_from_text(row["Minute"], "%-d/%-m/%Y %H:%M") +
Timedelta(microseconds=row["microsec"]))
return strptime_from_text(row["Minute"], "%-d/%-m/%Y %H:%M") + Timedelta(
microseconds=row["microsec"]
)
except (KeyError, TypeError, ValueError) as e:
exceptions.append(e)

Expand Down Expand Up @@ -347,10 +348,9 @@ def gmm_ici(
"""
df, ici = log_ici(df)

gmm = mixture.GaussianMixture(n_components=comp,
covariance_type="full",
random_state=42,
n_init=20)
gmm = mixture.GaussianMixture(
n_components=comp, covariance_type="full", random_state=42, n_init=20
)
labels = gmm.fit_predict(ici)

rank = argsort(argsort(gmm.means_.flatten()))
Expand Down Expand Up @@ -416,7 +416,11 @@ def cluster_ici(
gmm = mixture.GaussianMixture(n_components=comp, covariance_type="full")
gmm.fit(ar_ici)

component_names = ["Buzz ICIs", "Regular ICIs", "Long ICIs",]
component_names = [
"Buzz ICIs",
"Regular ICIs",
"Long ICIs",
]
cluster_info = []
for i in range(comp):
means = sort(gmm.means_, axis=0)[i][0]
Expand Down Expand Up @@ -512,10 +516,12 @@ def plot_gmm_ici(
x_axis,
gmm_icis.weights_[idx]
* stats.norm.pdf(
x_axis, gmm_icis.means_[idx, 0], sqrt(gmm_icis.covariances_[idx, 0, 0]),
x_axis,
gmm_icis.means_[idx, 0],
sqrt(gmm_icis.covariances_[idx, 0, 0]),
).ravel(),
label=f"(μ={gmm_icis.means_[idx, 0]:.2f},"
f"σ={sqrt(gmm_icis.covariances_[idx, 0, 0]):.2f})",
f"σ={sqrt(gmm_icis.covariances_[idx, 0, 0]):.2f})",
)
lines += [line]
(mixture_line,) = ax.plot(
Expand Down Expand Up @@ -640,8 +646,7 @@ def percent_calc(

# Aggregate and compute metrics
df = (
data
.groupby(time_unit)
data.groupby(time_unit)
.agg(
{
"DPh": "sum",
Expand All @@ -656,7 +661,9 @@ def percent_calc(
df["%click"] = df["dpm_count"] * 100 / (df["Day"] * 60)
df["%DPh"] = df["DPh"] * 100 / df["Day"]
df["FBR"] = df.apply(
lambda row: (row["Foraging"] * 100 / row["dpm_count"]) if row["dpm_count"] > 0 else 0,
lambda row: (row["Foraging"] * 100 / row["dpm_count"])
if row["dpm_count"] > 0
else 0,
axis=1,
)
df["%buzzes"] = df["Foraging"] * 100 / (df["Day"] * 60)
Expand Down
19 changes: 11 additions & 8 deletions src/post_processing/utils/pamguard_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path

from osekit.core_api.audio_data import AudioData
from osekit.utils.timestamp_utils import strftime_osmose_format
from osekit.core.audio_data import AudioData
from osekit.utils.timestamp import strftime_osmose_format
from pandas import DataFrame, Timedelta, Timestamp
from pypamguard import load_pamguard_binary_folder
from pypamguard.core.filters import DateFilter, Filters
Expand All @@ -11,11 +11,12 @@
logger.set_verbosity(Verbosity.ERROR)


def process_binary(audio: AudioData,
binary: Path,
dataset: str,
annotation: str,
) -> DataFrame:
def process_binary(
audio: AudioData,
binary: Path,
dataset: str,
annotation: str,
) -> DataFrame:
r"""Process PAMGuard binary files into APLOSE DataFrame.

Parameters
Expand Down Expand Up @@ -64,7 +65,9 @@ def process_binary(audio: AudioData,
"""
filter_obj = Filters(
{
"daterange": DateFilter(start_date=audio.begin, end_date=audio.end, ordered=True),
"daterange": DateFilter(
start_date=audio.begin, end_date=audio.end, ordered=True
),
},
)

Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
import soundfile as sf
import yaml
from osekit.utils.timestamp_utils import strftime_osmose_format
from osekit.utils.timestamp import strftime_osmose_format
from pandas import DataFrame, read_csv
from pandas.tseries import frequencies

Expand Down
18 changes: 8 additions & 10 deletions tests/test_fpod_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""FPOD/ CPOD processing functions tests."""

import pytest
import pytz
from pandas import DataFrame
Expand Down Expand Up @@ -96,7 +97,7 @@
#
# return data.reset_index(drop=True)

#@pytest.fixture(scope="module")
# @pytest.fixture(scope="module")
# @dt.working_directory(__file__)
# def df_raw() -> DataFrame:
# return read_csv("pod_raw.csv")
Expand All @@ -106,7 +107,7 @@
# def df_ap() -> DataFrame:
# return read_csv("pod_aplose.csv")

#@pytest.mark.mandatory
# @pytest.mark.mandatory
# def test_columns(df_raw: DataFrame) -> None:
# dt.validate(
# df_raw.columns,
Expand Down Expand Up @@ -142,7 +143,7 @@ def test_csv_folder_single_file(tmp_path) -> None:
csv_file = tmp_path / "data.csv"
csv_file.write_text("col1;col2\nval1;val2\nval3;val4", encoding="latin-1")

result = load_pod_folder(tmp_path)
result = load_pod_folder(tmp_path, "csv")

assert isinstance(result, DataFrame)
assert len(result) == 2
Expand Down Expand Up @@ -453,20 +454,17 @@ def test_pod2aplose_index_reset(timezone):
"""Test that index is properly reset after sorting."""
df = DataFrame({
"ChunkEnd": ["15/01/2024 12:00", "15/01/2024 10:00"],
"deploy.name": ["d1", "d2"]
"deploy.name": ["d1", "d2"],
})

result = pod2aplose(
df=df,
tz=timezone,
dataset_name="dataset",
annotation="click",
annotator="john"
df=df, tz=timezone, dataset_name="dataset", annotation="click", annotator="john"
)

# Index should be 0, 1 after reset
assert result.index.tolist() == [0, 1]


# meta_cut_aplose


Expand Down Expand Up @@ -596,4 +594,4 @@ def test_pod2aplose_index_reset(timezone):
# result = deploy_period(df)
# assert_frame_equal(result, expected)

# actual_data
# actual_data
Loading