From cbb0abec62fe42ed12bee94424e359f1c63feb89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= Date: Wed, 26 Mar 2025 13:19:42 +0000 Subject: [PATCH 1/3] Added environment variable capture functionality --- simvue/config/parameters.py | 1 + simvue/metadata.py | 21 ++++++++++++++++++++- simvue/run.py | 11 ++++++++++- tests/conftest.py | 1 + tests/functional/test_run_class.py | 23 ++++++++++++++++++++++- tests/unit/test_metadata.py | 25 ++++++++++++++++++++++++- 6 files changed, 78 insertions(+), 4 deletions(-) diff --git a/simvue/config/parameters.py b/simvue/config/parameters.py index 9e0b38bc..23770082 100644 --- a/simvue/config/parameters.py +++ b/simvue/config/parameters.py @@ -64,6 +64,7 @@ class DefaultRunSpecifications(pydantic.BaseModel): folder: str = pydantic.Field("/", pattern=sv_models.FOLDER_REGEX) metadata: dict[str, str | int | float | bool] | None = None mode: typing.Literal["offline", "disabled", "online"] = "online" + record_shell_vars: list[str] | None = None class ClientGeneralOptions(pydantic.BaseModel): diff --git a/simvue/metadata.py b/simvue/metadata.py index 0f63981c..eaeaae69 100644 --- a/simvue/metadata.py +++ b/simvue/metadata.py @@ -9,6 +9,8 @@ import contextlib import typing import json +import os +import fnmatch import toml import logging import pathlib @@ -179,7 +181,22 @@ def _node_js_env(repository: pathlib.Path) -> dict[str, typing.Any]: return js_meta -def environment(repository: pathlib.Path = pathlib.Path.cwd()) -> dict[str, typing.Any]: +def _environment_variables(glob_exprs: list[str]) -> dict[str, str]: + """Retrieve values for environment variables.""" + _env_vars: list[str] = list(os.environ.keys()) + _metadata: dict[str, str] = {} + + for pattern in glob_exprs: + for key in fnmatch.filter(_env_vars, pattern): + _metadata[key] = os.environ[key] + + return _metadata + + +def environment( + repository: pathlib.Path = pathlib.Path.cwd(), + env_var_glob_exprs: set[str] | None = None, +) -> dict[str, typing.Any]: """Retrieve environment metadata""" _environment_meta = {} if _python_meta := _python_env(repository): @@ -190,4 +207,6 @@ def environment(repository: pathlib.Path = pathlib.Path.cwd()) -> dict[str, typi _environment_meta["julia"] = _julia_meta if _js_meta := _node_js_env(repository): _environment_meta["javascript"] = _js_meta + if env_var_glob_exprs: + _environment_meta["shell"] = _environment_variables(env_var_glob_exprs) return _environment_meta diff --git a/simvue/run.py b/simvue/run.py index 355cd693..4772cc7f 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -613,6 +613,7 @@ def init( timeout: int | None = 180, visibility: typing.Literal["public", "tenant"] | list[str] | None = None, no_color: bool = False, + record_shell_vars: set[str] | None = None, ) -> bool: """Initialise a Simvue run @@ -650,6 +651,9 @@ def init( * A list of usernames with which to share this run no_color : bool, optional disable terminal colors. Default False. + record_shell_vars : list[str] | None, + list of environment variables to store as metadata, these can + either be defined as literal strings or globular expressions Returns ------- @@ -667,6 +671,7 @@ def init( folder = folder or self._user_config.run.folder name = name or self._user_config.run.name metadata = (metadata or {}) | (self._user_config.run.metadata or {}) + record_shell_vars = record_shell_vars or self._user_config.run.record_shell_vars self._term_color = not no_color @@ -734,7 +739,11 @@ def init( self._sv_obj.ttl = self._retention self._sv_obj.status = self._status self._sv_obj.tags = tags - self._sv_obj.metadata = (metadata or {}) | git_info(os.getcwd()) | environment() + self._sv_obj.metadata = ( + (metadata or {}) + | git_info(os.getcwd()) + | environment(env_var_glob_exprs=record_shell_vars) + ) self._sv_obj.heartbeat_timeout = timeout self._sv_obj.alerts = [] self._sv_obj.created = time.time() diff --git a/tests/conftest.py b/tests/conftest.py index 01c5cf4e..f0b37c10 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -139,6 +139,7 @@ def setup_test_run(run: sv_run.Run, create_objects: bool, request: pytest.Fixtur run.config(suppress_errors=False) run._heartbeat_interval = 1 + run.init( name=TEST_DATA['metadata']['test_identifier'], tags=TEST_DATA["tags"], diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py index e493c5db..b248087c 100644 --- a/tests/functional/test_run_class.py +++ b/tests/functional/test_run_class.py @@ -1,6 +1,6 @@ import os from os.path import basename -from numpy import identity +from numpy import identity, rec import pytest import pytest_mock import time @@ -1052,3 +1052,24 @@ def test_reconnect(mode, monkeypatch: pytest.MonkeyPatch) -> None: _reconnected_run = client.get_run(run_id) assert dict(_reconnected_run.metrics)["test_metric"]["last"] == 1 assert client.get_events(run_id)[0]["message"] == "Testing!" + + +@pytest.mark.run +def test_env_var_metadata() -> None: + # Add some environment variables to glob + _recorded_env = { + "SIMVUE_RUN_TEST_VAR_1": "1", + "SIMVUE_RUN_TEST_VAR_2": "hello" + } + os.environ.update(_recorded_env) + with simvue.Run() as run: + run.init( + name="test_reconnect", + folder="/simvue_unit_testing", + retention_period="2 minutes", + timeout=None, + running=False, + record_shell_vars={"SIMVUE_RUN_TEST_VAR_*"} + ) + _recorded_meta = RunObject(identifier=run._id).metadata + assert all(key in _recorded_meta.get("shell") for key in _recorded_env) diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py index 5c454e14..4cae6072 100644 --- a/tests/unit/test_metadata.py +++ b/tests/unit/test_metadata.py @@ -1,3 +1,4 @@ +import os import pytest import pathlib import re @@ -51,4 +52,26 @@ def test_environment() -> None: assert metadata["python"]["project"]["name"] == "example-repo" assert metadata["rust"]["project"]["name"] == "example_project" assert metadata["julia"]["project"]["name"] == "Julia Demo Project" - assert metadata["javascript"]["project"]["name"] == "my-awesome-project" \ No newline at end of file + assert metadata["javascript"]["project"]["name"] == "my-awesome-project" + + +@pytest.mark.metadata +@pytest.mark.local +def test_slurm_env_var_capture() -> None: + _slurm_env = { + "SLURM_CPUS_PER_TASK": "2", + "SLURM_TASKS_PER_NODE": "1", + "SLURM_NNODES": "1", + "SLURM_NTASKS_PER_NODE": "1", + "SLURM_NTASKS": "1", + "SLURM_JOB_CPUS_PER_NODE": "2", + "SLURM_CPUS_ON_NODE": "2", + "SLURM_JOB_NUM_NODES": "1", + "SLURM_MEM_PER_NODE": "2000", + "SLURM_NPROCS": "1", + "SLURM_TRES_PER_TASK": "cpu:2", + } + os.environ.update(_slurm_env) + + sv_meta.metadata = sv_meta.environment(env_var_glob_exprs={"SLURM_*"}) + assert all((key, value) in sv_meta.metadata["shell"].items() for key, value in _slurm_env.items()) From 02170622f0cc19e58af1fbff990b1093c1c2aaf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= Date: Wed, 26 Mar 2025 13:21:10 +0000 Subject: [PATCH 2/3] Updated Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fcffc1f4..dca6ecb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased * Added sorting to server queries, users can now specify to sort by columns during data retrieval from the database. +* Added ability to include environment variables within metadata for runs. ## [v2.0.1](https://github.com/simvue-io/client/releases/tag/v2.0.1) - 2025-03-24 * Improvements to docstrings on methods, classes and functions. ## [v2.0.0](https://github.com/simvue-io/client/releases/tag/v2.0.0) - 2025-03-07 From 86938e9c2e0b98519731dfbca6e0067dc0f88d97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= Date: Thu, 14 Aug 2025 09:50:39 +0100 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=A7=AA=20Fix=20various=20test=20issue?= =?UTF-8?q?s,=20including=20special=20case=20for=20events=20and=20metrics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- simvue/api/objects/base.py | 13 ++++-- simvue/api/objects/events.py | 1 + simvue/api/objects/metrics.py | 1 + tests/functional/test_client.py | 49 +++++++++++++++----- tests/functional/test_executor.py | 4 +- tests/functional/test_run_class.py | 2 +- tests/functional/test_run_execute_process.py | 31 ++++++++----- tests/unit/test_file_artifact.py | 4 +- 8 files changed, 74 insertions(+), 31 deletions(-) diff --git a/simvue/api/objects/base.py b/simvue/api/objects/base.py index 65659bc5..cb178e44 100644 --- a/simvue/api/objects/base.py +++ b/simvue/api/objects/base.py @@ -184,6 +184,7 @@ def __init__( self._logger = logging.getLogger(f"simvue.{self.__class__.__name__}") self._label: str = getattr(self, "_label", self.__class__.__name__.lower()) self._read_only: bool = _read_only + self._is_set: bool = False self._endpoint: str = getattr(self, "_endpoint", f"{self._label}s") self._identifier: str | None = ( identifier if identifier is not None else f"offline_{uuid.uuid1()}" @@ -563,9 +564,15 @@ def _post(self, is_json: bool = True, **kwargs) -> dict[str, typing.Any]: if _id := _json_response.get("id"): self._logger.debug("'%s' created successfully", _id) self._identifier = _id - else: - _detail = _json_response.get("detail", "") - raise RuntimeError(f"Expected new ID for {self._label} but none found: {_detail}.") + elif not self._is_set: + _detail = _json_response.get("detail", _json_response) + + if not _detail: + _detail = "No information in JSON response." + + raise RuntimeError( + f"Expected new ID for {self._label} but none found: {_detail}." + ) return _json_response diff --git a/simvue/api/objects/events.py b/simvue/api/objects/events.py index ff5b9e50..f786c99b 100644 --- a/simvue/api/objects/events.py +++ b/simvue/api/objects/events.py @@ -37,6 +37,7 @@ def __init__( self._label = "event" super().__init__(_read_only=_read_only, _local=_local, **kwargs) self._run_id = self._staging.get("run") + self._is_set = True @classmethod @pydantic.validate_call diff --git a/simvue/api/objects/metrics.py b/simvue/api/objects/metrics.py index 5e5a1988..fb9cea1e 100644 --- a/simvue/api/objects/metrics.py +++ b/simvue/api/objects/metrics.py @@ -39,6 +39,7 @@ def __init__( self._label = "metric" super().__init__(_read_only=_read_only, _local=_local, **kwargs) self._run_id = self._staging.get("run") + self._is_set = True @classmethod @pydantic.validate_call diff --git a/tests/functional/test_client.py b/tests/functional/test_client.py index 0abc9033..197bdcc8 100644 --- a/tests/functional/test_client.py +++ b/tests/functional/test_client.py @@ -148,6 +148,16 @@ def test_plot_metrics(create_test_run: tuple[sv_run.Run, dict]) -> None: pytest.skip("Plotting modules not found") client = svc.Client() + attempts: int = 0 + while ( + not list(client.get_metrics_names(create_test_run[1]["run_id"])) + and attempts < 10 + ): + time.sleep(1) + attempts += 1 + + if attempts >= 10: + raise AssertionError("Failed to retrieve metrics.") client.plot_metrics( run_ids=[create_test_run[1]["run_id"]], metric_names=list(create_test_run[1]["metrics"]), @@ -244,19 +254,34 @@ def test_get_runs( attributes: list[str] | None ) -> None: client = svc.Client() + attempts: int = 0 + + _result = None + while ( + _result is None + and attempts < 10 + ): + time.sleep(1) + try: + _result = client.get_runs( + filters=[], + output_format=output_format, + count_limit=10, + sort_by_columns=sorting, + timing_info=timing_info, + system_info=system_info, + metrics=metrics, + metadata=metadata, + alerts=alerts, + attributes=attributes + ) + except ObjectNotFoundError: + _result = None + attempts += 1 + + if attempts >= 10: + raise AssertionError("Failed to retrieve created runs.") - _result = client.get_runs( - filters=[], - output_format=output_format, - count_limit=10, - sort_by_columns=sorting, - timing_info=timing_info, - system_info=system_info, - metrics=metrics, - metadata=metadata, - alerts=alerts, - attributes=attributes - ) if output_format == "dataframe": assert not _result.empty diff --git a/tests/functional/test_executor.py b/tests/functional/test_executor.py index b0063764..0cf50b23 100644 --- a/tests/functional/test_executor.py +++ b/tests/functional/test_executor.py @@ -96,8 +96,8 @@ def callback(*_, evts=events, ident=i, **__): assert out_file.exists() assert triggers[i].is_set() for i in range(10): - os.remove(f"test_executor_multiprocess_cmd_{i}.err") - os.remove(f"test_executor_multiprocess_cmd_{i}.out") + os.remove(f"test_executor_multiprocess_cmd_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.err") + os.remove(f"test_executor_multiprocess_cmd_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.out") with contextlib.suppress(ObjectNotFoundError): folder.delete(recursive=True, delete_runs=True) diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py index 30a5b4b3..804f9bb6 100644 --- a/tests/functional/test_run_class.py +++ b/tests/functional/test_run_class.py @@ -1303,7 +1303,7 @@ def test_env_var_metadata() -> None: running=False, record_shell_vars={"SIMVUE_RUN_TEST_VAR_*"} ) - _recorded_meta = RunObject(identifier=run._id).metadata + _recorded_meta = RunObject(identifier=run.id).metadata assert all(key in _recorded_meta.get("shell") for key in _recorded_env) @pytest.mark.run diff --git a/tests/functional/test_run_execute_process.py b/tests/functional/test_run_execute_process.py index f1ca896c..86bc9d13 100644 --- a/tests/functional/test_run_execute_process.py +++ b/tests/functional/test_run_execute_process.py @@ -1,3 +1,4 @@ +import contextlib import pathlib import time import os @@ -5,7 +6,6 @@ import tempfile import pytest import filecmp -import simvue.sender as sv_send from simvue import Run, Client from simvue.sender import sender @@ -35,7 +35,7 @@ def test_abort_all_processes(create_plain_run: tuple[Run, dict]) -> None: for i in range(1, 3): _run.add_process(f"process_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}", executable="bash", script=temp_f.name) - assert _run.executor.get_command(f"process_{i}") == f"bash {temp_f.name}" + assert _run.executor.get_command(f"process_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}") == f"bash {temp_f.name}" time.sleep(3) @@ -44,7 +44,7 @@ def test_abort_all_processes(create_plain_run: tuple[Run, dict]) -> None: # Check that for when one of the processes has stopped _attempts: int = 0 - _first_out = next(pathlib.Path.cwd().glob("*process_*.out")) + _first_out = next(pathlib.Path.cwd().glob(f"*process_*_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.out")) while _first_out.stat().st_size == 0 and _attempts < 10: time.sleep(1) @@ -54,19 +54,20 @@ def test_abort_all_processes(create_plain_run: tuple[Run, dict]) -> None: raise AssertionError("Failed to terminate processes") # Check the Python process did not error - _out_err = pathlib.Path.cwd().glob("*process_*.err") + _out_err = pathlib.Path.cwd().glob(f"*process_*_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.err") for file in _out_err: with file.open() as in_f: assert not in_f.readlines() # Now check the counter in the process was terminated # just beyond the sleep time - _out_files = pathlib.Path.cwd().glob("*process_*.out") + _out_files = pathlib.Path.cwd().glob(f"*process_*_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.out") for file in _out_files: with file.open() as in_f: assert (lines := in_f.readlines()) assert int(lines[0].strip()) < 4 - os.unlink(temp_f.name) + with contextlib.suppress(FileNotFoundError): + os.unlink(temp_f.name) def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None: @@ -78,11 +79,16 @@ def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None: """ run, _ = create_plain_run with tempfile.TemporaryDirectory() as temp_dir: - with tempfile.NamedTemporaryFile(delete=False, dir=temp_dir, suffix=".py") as temp_file: + with tempfile.NamedTemporaryFile( + delete=False, + dir=temp_dir, + prefix=os.environ.get("PYTEST_XDIST_WORKER", "0"), + suffix=".py" + ) as temp_file: with open(temp_file.name, "w") as out_f: out_f.writelines([ "import os\n", - "f = open('new_file.txt', 'w')\n", + f"f = open('new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt', 'w')\n", "f.write('Test Line')\n", "f.close()" ]) @@ -95,7 +101,7 @@ def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None: cwd=temp_dir ) time.sleep(1) - run.save_file(os.path.join(temp_dir, "new_file.txt"), 'output') + run.save_file(os.path.join(temp_dir, f"new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt"), 'output') client = Client() @@ -104,8 +110,9 @@ def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None: client.get_artifact_as_file(run_id, os.path.basename(temp_file.name), output_dir=os.path.join(temp_dir, "downloaded")) assert filecmp.cmp(os.path.join(temp_dir, "downloaded", os.path.basename(temp_file.name)), temp_file.name) - client.get_artifact_as_file(run_id, "new_file.txt", output_dir=os.path.join(temp_dir, "downloaded")) - with open(os.path.join(temp_dir, "downloaded", "new_file.txt"), "r") as new_file: + client.get_artifact_as_file(run_id, f"new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt", output_dir=os.path.join(temp_dir, "downloaded")) + with open(os.path.join(temp_dir, "downloaded", f"new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt"), "r") as new_file: assert new_file.read() == "Test Line" - os.unlink(temp_file.name) + with contextlib.suppress(FileNotFoundError): + os.unlink(temp_file.name) diff --git a/tests/unit/test_file_artifact.py b/tests/unit/test_file_artifact.py index aeb47eda..c442086c 100644 --- a/tests/unit/test_file_artifact.py +++ b/tests/unit/test_file_artifact.py @@ -1,3 +1,4 @@ +import contextlib import pytest import os import uuid @@ -46,7 +47,8 @@ def test_file_artifact_creation_online() -> None: assert _artifact.to_dict() _run.delete() _folder.delete(recursive=True, delete_runs=True, runs_only=False) - os.unlink(temp_f.name) + with contextlib.suppress(FileNotFoundError): + os.unlink(temp_f.name) if _failed: raise AssertionError("\n\t-" + "\n\t- ".join(": ".join(i) for i in _failed))