diff --git a/backend/storage/resolvers/_osekit.py b/backend/storage/resolvers/_osekit.py index 806531bba..75fb9922a 100644 --- a/backend/storage/resolvers/_osekit.py +++ b/backend/storage/resolvers/_osekit.py @@ -1,10 +1,10 @@ -import json from pathlib import PureWindowsPath, Path from metadatax.data.models import FileFormat +from osekit.core_api.spectro_dataset import SpectroDataset +from osekit.public_api.dataset import Dataset as OSEkitDataset from backend.api.models import SpectrogramAnalysis, Dataset, Colormap, FFT, Spectrogram -from backend.storage.exceptions import AnalysisNotFoundException from backend.storage.types import ( FailedItem, ) @@ -15,56 +15,60 @@ make_absolute_server, make_static_url, clean_path, - open_file, ) -# from osekit.core_api.spectro_dataset import SpectroDataset -# from osekit.public_api.dataset import Dataset as OSEkitDataset -from backend.utils.osekit_replace import SpectroDataset, OSEkitDataset +# from backend.utils.osekit_replace import SpectroDataset, OSEkitDataset from ._legacy_osekit import LegacyOSEkitResolver class OSEkitResolver(LegacyOSEkitResolver): + + # Utils + + def _get_osekit_dataset(self, path: str) -> OSEkitDataset | None: + json_path = join(path, "dataset.json") + if exists(json_path): + return OSEkitDataset.from_json(Path(make_absolute_server(json_path))) + return None + + # Implements + def _get_dataset_for_path( self, path: str | None = None ) -> Dataset | FailedItem | None: # pylint: disable=broad-exception-caught - json_path = join(path, "dataset.json") - if exists(json_path): - try: - with open_file(json_path) as f: - json.loads(f.read()) - return Dataset( - name=PureWindowsPath(path).name, - path=make_path_relative(path), - ) - except Exception as e: - return FailedItem(path=path, error=e) - return super()._get_dataset_for_path(path=path) + try: + dataset = self._get_osekit_dataset(path) + if not dataset: + return super()._get_dataset_for_path(path=path) + return Dataset( + name=PureWindowsPath(path).name, + path=make_path_relative(path), + ) + except Exception as e: + return FailedItem(path=path, error=e) def _get_all_analysis_for_dataset( self, dataset: Dataset, detailed: bool = False ) -> list[SpectrogramAnalysis]: - json_path = join(dataset.path, "dataset.json") - if not exists(json_path): + osekit_dataset = self._get_osekit_dataset(dataset.path) + if not osekit_dataset: return super()._get_all_analysis_for_dataset(dataset=dataset) analysis: list[SpectrogramAnalysis] = [] - with open_file(json_path) as f: - d = json.loads(f.read()) - for _name, info in d["datasets"].items(): - if info["class"] != SpectroDataset.__name__: - continue - analysis.append( - self._get_analysis( - dataset=dataset, - relative_path=make_path_relative( - PureWindowsPath(info["json"]).parent.as_posix(), - to=dataset.path, - ), - detailed=detailed, - ) + for _name, info in osekit_dataset.datasets.items(): + if info["class"] != SpectroDataset.__name__: + continue + analysis.append( + self._get_analysis( + dataset=dataset, + relative_path=make_path_relative( + PureWindowsPath(info["dataset"]).parent.as_posix(), + to=dataset.path, + ), + detailed=detailed, ) + ) return analysis def _get_analysis( @@ -74,45 +78,32 @@ def _get_analysis( return super()._get_analysis( dataset=dataset, relative_path=relative_path, detailed=detailed ) + osekit_dataset = self._get_osekit_dataset(dataset.path) spectro_dataset: SpectroDataset | None = None - json_path = join(dataset.path, "dataset.json") - if exists(json_path): - with open_file(json_path) as f: - d = json.loads(f.read()) - for _name, info in d["datasets"].items(): - if info["class"] != SpectroDataset.__name__: - continue - path = make_path_relative( - PureWindowsPath(info["json"]).parent.as_posix(), - to=dataset.path, + if not osekit_dataset: + return None + for _name, info in osekit_dataset.datasets.items(): + if info["class"] != SpectroDataset.__name__: + continue + path = make_path_relative( + PureWindowsPath(info["dataset"]).parent.as_posix(), + to=dataset.path, + ) + if path == relative_path: + if not detailed: + return SpectrogramAnalysis( + name=PureWindowsPath(relative_path).name, + path=relative_path, ) - if path == relative_path: - if not detailed: - return SpectrogramAnalysis( - name=PureWindowsPath(relative_path).name, - path=relative_path, - ) - spectro_dataset = SpectroDataset.from_json( - Path( - make_absolute_server( - join( - dataset.path, - make_path_relative( - info["json"], to=dataset.path - ), - ) - ) - ) - ) + spectro_dataset = osekit_dataset.get_dataset(info["analysis"]) + if spectro_dataset is None: return None if not detailed: - return SpectrogramAnalysis( - name=PureWindowsPath(relative_path).name, path=relative_path - ) + return SpectrogramAnalysis(name=spectro_dataset.name, path=relative_path) return SpectrogramAnalysis( - name=PureWindowsPath(relative_path).name, + name=spectro_dataset.name, path=relative_path, start=spectro_dataset.begin, end=spectro_dataset.end, @@ -137,19 +128,7 @@ def __get_spectro_dataset( if not exists(json_path): return None osekit_dataset = OSEkitDataset.from_json(Path(make_absolute_server(json_path))) - - sd: list[SpectroDataset] = [ - d["dataset"] - for d in osekit_dataset.datasets.values() - if d["class"] == SpectroDataset.__name__ - and make_path_relative( - d["dataset"].folder, to=clean_path(osekit_dataset.folder) - ) - == analysis.path - ] - if len(sd) == 0: - raise AnalysisNotFoundException(analysis.path) - return sd[0] + return osekit_dataset.get_dataset(analysis.name) def get_all_spectrograms_for_analysis( self, analysis: SpectrogramAnalysis diff --git a/backend/utils/osekit_replace.py b/backend/utils/osekit_replace.py deleted file mode 100644 index 2999989d8..000000000 --- a/backend/utils/osekit_replace.py +++ /dev/null @@ -1,239 +0,0 @@ -import json -from os.path import join -from pathlib import PureWindowsPath, Path -from typing import TypedDict - -import numpy as np -from osekit.config import TIMESTAMP_FORMATS_EXPORTED_FILES -from osekit.utils.timestamp_utils import strptime_from_text -from pandas import Timestamp, Timedelta -from scipy.signal import ShortTimeFFT - -from backend.storage.utils import make_path_relative, clean_path, make_absolute_server - - -class TFile: - begin: Timestamp - end: Timestamp - path: str - - def __init__(self, begin: Timestamp, end: Timestamp, path: str): - self.begin = begin - self.end = end - self.path = path - - -class AudioData: - files: list[TFile] - - def __init__(self, files: list[TFile]): - self.files = files - - -class SpectroData: - name: str - v_lim: tuple[float, float] - begin: Timestamp - end: Timestamp - duration: Timedelta - audio_data: AudioData - files: list[TFile] - - def __init__( - self, - name: str, - begin: Timestamp, - end: Timestamp, - v_lim: tuple[float, float], - audio_data: AudioData, - files: list[TFile], - ): - self.name = name - self.begin = begin - self.end = end - self.v_lim = v_lim - self.duration = self.end - self.begin - self.audio_data = audio_data - self.files = files - - -class SpectroDataset: - # pylint: disable=too-many-instance-attributes - folder: Path - name: str - fft: ShortTimeFFT | None - colormap: str | None - - data: list[SpectroData] - data_duration: Timedelta - begin: Timestamp - end: Timestamp - - __name__ = "type" - - def __init__( - self, - folder: Path, - name: str, - fft: ShortTimeFFT | None, - colormap: str | None, - data: list[SpectroData], - ): - self.name = name - self.folder = folder - self.fft = fft - self.colormap = colormap - self.data = data - self.begin = min(d.begin for d in data) - self.end = max(d.end for d in data) - data_durations = [Timedelta(d.duration).round(freq="1s") for d in data] - self.data_duration = max(set(data_durations), key=data_durations.count) - - @staticmethod - def from_json(json_path: Path) -> "SpectroDataset": - with json_path.open("r") as f: - dataset_data = json.loads(f.read()) - sft = ( - list(dataset_data["sft"].values())[0] - if "sft" in dataset_data and dataset_data["sft"] - else None - ) - folder = Path( - PureWindowsPath(json_path).as_posix()[ - : -len(f"/{json_path.stem}{json_path.suffix}") - ] - ) - all_spectro_data = [] - for spectro_name, spectro_data in dataset_data["data"].items(): - audio_files = [] - spectro_files = [] - for _name, file in spectro_data["files"].items(): - spectro_files.append( - TFile( - path=join( - folder.parent.parent, - make_path_relative( - file["path"], to=folder.parent.parent - ), - ), - begin=Timestamp( - strptime_from_text( - file["begin"], - datetime_template=TIMESTAMP_FORMATS_EXPORTED_FILES, - ) - ), - end=Timestamp( - strptime_from_text( - file["end"], - datetime_template=TIMESTAMP_FORMATS_EXPORTED_FILES, - ) - ), - ) - ) - for _name, file in spectro_data["audio_data"]["files"].items(): - audio_files.append( - TFile( - path=join( - folder.parent.parent, - make_path_relative( - Path(folder, file["path"]).resolve() - if ".." in file["path"] - else file["path"], - to=folder.parent.parent, - ), - ), - begin=Timestamp( - strptime_from_text( - file["begin"], - datetime_template=TIMESTAMP_FORMATS_EXPORTED_FILES, - ) - ), - end=Timestamp( - strptime_from_text( - file["end"], - datetime_template=TIMESTAMP_FORMATS_EXPORTED_FILES, - ) - ), - ) - ) - all_spectro_data.append( - SpectroData( - name=spectro_name, - begin=Timestamp(spectro_data["begin"]), - end=Timestamp(spectro_data["end"]), - v_lim=spectro_data["v_lim"], - audio_data=AudioData(files=audio_files), - files=spectro_files, - ) - ) - - return SpectroDataset( - folder=folder, - name=dataset_data["name"], - fft=ShortTimeFFT( - win=np.array(sft["win"]), - hop=sft["hop"], - fs=sft["fs"], - mfft=sft["mfft"], - ) - if sft - else None, - colormap=list(dataset_data["data"].values())[0]["colormap"], - data=all_spectro_data, - ) - - @property - def v_lim(self) -> tuple[float, float] | None: - """Return the most frequent v_lim of the spectro dataset.""" - v_lims = [d.v_lim for d in self.data] - return max(v_lims, key=v_lims.count) - - -class OSEkitDataset: - datasets: dict[ - str, - type[ - TypedDict( - "", - { - "class": str, - "analysis": str, - "dataset": SpectroDataset, - }, - ) - ], - ] - folder: Path - - def __init__(self, folder: Path, datasets: dict): - self.datasets = datasets - self.folder = folder - - @staticmethod - def from_json(json_path: Path) -> "OSEkitDataset": - with json_path.open("r") as f: - base_path = clean_path(json_path)[: -len("dataset.json")] - d = json.loads(f.read()) - datasets = {} - for name, dataset in d["datasets"].items(): - if dataset["class"] != SpectroDataset.__name__: - continue - datasets[name] = { - "class": dataset["class"], - "analysis": dataset["analysis"], - "dataset": SpectroDataset.from_json( - Path( - join( - base_path, - make_path_relative( - dataset["json"], to=PureWindowsPath(base_path).stem - ), - ) - ) - ), - } - - return OSEkitDataset( - folder=Path(make_absolute_server(make_path_relative(base_path))), - datasets=datasets, - ) diff --git a/poetry.lock b/poetry.lock index b0ebce3ff..2063bea28 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2478,15 +2478,13 @@ files = [ [[package]] name = "osekit" -version = "0.4.1" +version = "0.4.3" description = "OSEkit" optional = false python-versions = ">=3.12" groups = ["main"] -files = [ - {file = "osekit-0.4.1-py3-none-any.whl", hash = "sha256:4df3aba82b8299b2d1e482f83cf9ef06b613bdf9ba7cb84468d469920754333e"}, - {file = "osekit-0.4.1.tar.gz", hash = "sha256:4b1ee9b7e9194f7da8925889305f024b8bacdfa98e8c3c0f7538e09c1cb8e0fe"}, -] +files = [] +develop = false [package.dependencies] matplotlib = ">=3.10.7" @@ -2494,13 +2492,20 @@ notebook = ">=7.4.4" numpy = ">=2.2.6" pandas = ">=2.2.3" pytest = ">=8.3.5" +pytz = ">=2025.2" pyyaml = ">=6.0.2" scipy = ">=1.15.3" soundfile = ">=0.13.1" soxr = ">=0.5.0.post1" -tomlkit = ">=0.13.2" tqdm = ">=4.67.1" +[package.extras] +mseed = ["obspy"] + +[package.source] +type = "directory" +url = "../OSEkit" + [[package]] name = "packaging" version = "24.2" @@ -3184,14 +3189,14 @@ dev = ["backports.zoneinfo ; python_version < \"3.9\"", "black", "build", "freez [[package]] name = "pytz" -version = "2024.2" +version = "2026.1.post1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" groups = ["main", "dev"] files = [ - {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, - {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, + {file = "pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a"}, + {file = "pytz-2026.1.post1.tar.gz", hash = "sha256:3378dde6a0c3d26719182142c56e60c7f9af7e968076f31aae569d72a0358ee1"}, ] [[package]] @@ -3831,7 +3836,7 @@ version = "0.13.2" description = "Style preserving TOML library" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] +groups = ["dev"] files = [ {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"}, {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"}, @@ -4076,4 +4081,4 @@ brotli = ["brotli"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "aff12d8c28e4ee43af0ee5d8972fcab00b38b2ea84fc3bc8a5fe755d05072eab" +content-hash = "399aa3f1b78f7a3162a9b053ad8253250d4b8b46fbb9ab4a6b3636a087d9d4b6" diff --git a/pyproject.toml b/pyproject.toml index 6ff5bb25c..9e9d6ec82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,8 @@ drf-spectacular = "^0.26.0" django-tinymce = "^3.6.1" python-dateutil = ">=2.4" whitenoise = "^6.9.0" -osekit = "^0.4.1" -#osekit = {path = "./OSEkit", develop=true} +#osekit = "^0.4.1" +osekit = {path = "../OSEkit"} graphene-django = "^3.2.3" graphene-django-pagination = {url = "https://github.com/instruct-br/graphene-django-pagination/archive/master.zip"} django-filter = ">=2"