From cbb0abec62fe42ed12bee94424e359f1c63feb89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= <kristian.zarebski@ukaea.uk>
Date: Wed, 26 Mar 2025 13:19:42 +0000
Subject: [PATCH 1/3] Added environment variable capture functionality

---
 simvue/config/parameters.py        |  1 +
 simvue/metadata.py                 | 21 ++++++++++++++++++++-
 simvue/run.py                      | 11 ++++++++++-
 tests/conftest.py                  |  1 +
 tests/functional/test_run_class.py | 23 ++++++++++++++++++++++-
 tests/unit/test_metadata.py        | 25 ++++++++++++++++++++++++-
 6 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/simvue/config/parameters.py b/simvue/config/parameters.py
index 9e0b38bc..23770082 100644
--- a/simvue/config/parameters.py
+++ b/simvue/config/parameters.py
@@ -64,6 +64,7 @@ class DefaultRunSpecifications(pydantic.BaseModel):
     folder: str = pydantic.Field("/", pattern=sv_models.FOLDER_REGEX)
     metadata: dict[str, str | int | float | bool] | None = None
     mode: typing.Literal["offline", "disabled", "online"] = "online"
+    record_shell_vars: list[str] | None = None
 
 
 class ClientGeneralOptions(pydantic.BaseModel):
diff --git a/simvue/metadata.py b/simvue/metadata.py
index 0f63981c..eaeaae69 100644
--- a/simvue/metadata.py
+++ b/simvue/metadata.py
@@ -9,6 +9,8 @@
 import contextlib
 import typing
 import json
+import os
+import fnmatch
 import toml
 import logging
 import pathlib
@@ -179,7 +181,22 @@ def _node_js_env(repository: pathlib.Path) -> dict[str, typing.Any]:
     return js_meta
 
 
-def environment(repository: pathlib.Path = pathlib.Path.cwd()) -> dict[str, typing.Any]:
+def _environment_variables(glob_exprs: list[str]) -> dict[str, str]:
+    """Retrieve values for environment variables."""
+    _env_vars: list[str] = list(os.environ.keys())
+    _metadata: dict[str, str] = {}
+
+    for pattern in glob_exprs:
+        for key in fnmatch.filter(_env_vars, pattern):
+            _metadata[key] = os.environ[key]
+
+    return _metadata
+
+
+def environment(
+    repository: pathlib.Path = pathlib.Path.cwd(),
+    env_var_glob_exprs: set[str] | None = None,
+) -> dict[str, typing.Any]:
     """Retrieve environment metadata"""
     _environment_meta = {}
     if _python_meta := _python_env(repository):
@@ -190,4 +207,6 @@ def environment(repository: pathlib.Path = pathlib.Path.cwd()) -> dict[str, typi
         _environment_meta["julia"] = _julia_meta
     if _js_meta := _node_js_env(repository):
         _environment_meta["javascript"] = _js_meta
+    if env_var_glob_exprs:
+        _environment_meta["shell"] = _environment_variables(env_var_glob_exprs)
     return _environment_meta
diff --git a/simvue/run.py b/simvue/run.py
index 355cd693..4772cc7f 100644
--- a/simvue/run.py
+++ b/simvue/run.py
@@ -613,6 +613,7 @@ def init(
         timeout: int | None = 180,
         visibility: typing.Literal["public", "tenant"] | list[str] | None = None,
         no_color: bool = False,
+        record_shell_vars: set[str] | None = None,
     ) -> bool:
         """Initialise a Simvue run
 
@@ -650,6 +651,9 @@ def init(
                 * A list of usernames with which to share this run
         no_color : bool, optional
             disable terminal colors. Default False.
+        record_shell_vars : list[str] | None,
+            list of environment variables to store as metadata, these can
+            either be defined as literal strings or globular expressions
 
         Returns
         -------
@@ -667,6 +671,7 @@ def init(
         folder = folder or self._user_config.run.folder
         name = name or self._user_config.run.name
         metadata = (metadata or {}) | (self._user_config.run.metadata or {})
+        record_shell_vars = record_shell_vars or self._user_config.run.record_shell_vars
 
         self._term_color = not no_color
 
@@ -734,7 +739,11 @@ def init(
         self._sv_obj.ttl = self._retention
         self._sv_obj.status = self._status
         self._sv_obj.tags = tags
-        self._sv_obj.metadata = (metadata or {}) | git_info(os.getcwd()) | environment()
+        self._sv_obj.metadata = (
+            (metadata or {})
+            | git_info(os.getcwd())
+            | environment(env_var_glob_exprs=record_shell_vars)
+        )
         self._sv_obj.heartbeat_timeout = timeout
         self._sv_obj.alerts = []
         self._sv_obj.created = time.time()
diff --git a/tests/conftest.py b/tests/conftest.py
index 01c5cf4e..f0b37c10 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -139,6 +139,7 @@ def setup_test_run(run: sv_run.Run, create_objects: bool, request: pytest.Fixtur
 
     run.config(suppress_errors=False)
     run._heartbeat_interval = 1
+
     run.init(
         name=TEST_DATA['metadata']['test_identifier'],
         tags=TEST_DATA["tags"],
diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py
index e493c5db..b248087c 100644
--- a/tests/functional/test_run_class.py
+++ b/tests/functional/test_run_class.py
@@ -1,6 +1,6 @@
 import os
 from os.path import basename
-from numpy import identity
+from numpy import identity, rec
 import pytest
 import pytest_mock
 import time
@@ -1052,3 +1052,24 @@ def test_reconnect(mode, monkeypatch: pytest.MonkeyPatch) -> None:
     _reconnected_run = client.get_run(run_id)
     assert dict(_reconnected_run.metrics)["test_metric"]["last"] == 1
     assert client.get_events(run_id)[0]["message"] == "Testing!"
+
+
+@pytest.mark.run
+def test_env_var_metadata() -> None:
+    # Add some environment variables to glob
+    _recorded_env = {
+        "SIMVUE_RUN_TEST_VAR_1": "1",
+        "SIMVUE_RUN_TEST_VAR_2": "hello"
+    }
+    os.environ.update(_recorded_env)
+    with simvue.Run() as run:
+        run.init(
+            name="test_reconnect",
+            folder="/simvue_unit_testing",
+            retention_period="2 minutes",
+            timeout=None,
+            running=False,
+            record_shell_vars={"SIMVUE_RUN_TEST_VAR_*"}
+        )
+    _recorded_meta = RunObject(identifier=run._id).metadata
+    assert all(key in _recorded_meta.get("shell") for key in _recorded_env)
diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py
index 5c454e14..4cae6072 100644
--- a/tests/unit/test_metadata.py
+++ b/tests/unit/test_metadata.py
@@ -1,3 +1,4 @@
+import os
 import pytest
 import pathlib
 import re
@@ -51,4 +52,26 @@ def test_environment() -> None:
     assert metadata["python"]["project"]["name"] == "example-repo"
     assert metadata["rust"]["project"]["name"] == "example_project"
     assert metadata["julia"]["project"]["name"] == "Julia Demo Project"
-    assert metadata["javascript"]["project"]["name"] == "my-awesome-project"
\ No newline at end of file
+    assert metadata["javascript"]["project"]["name"] == "my-awesome-project"
+
+
+@pytest.mark.metadata
+@pytest.mark.local
+def test_slurm_env_var_capture() -> None:
+    _slurm_env = {
+        "SLURM_CPUS_PER_TASK": "2",
+        "SLURM_TASKS_PER_NODE": "1",
+        "SLURM_NNODES": "1",
+        "SLURM_NTASKS_PER_NODE": "1",
+        "SLURM_NTASKS": "1",
+        "SLURM_JOB_CPUS_PER_NODE": "2",
+        "SLURM_CPUS_ON_NODE": "2",
+        "SLURM_JOB_NUM_NODES": "1",
+        "SLURM_MEM_PER_NODE": "2000",
+        "SLURM_NPROCS": "1",
+        "SLURM_TRES_PER_TASK": "cpu:2",
+    }
+    os.environ.update(_slurm_env)
+
+    sv_meta.metadata = sv_meta.environment(env_var_glob_exprs={"SLURM_*"})
+    assert all((key, value) in sv_meta.metadata["shell"].items() for key, value in _slurm_env.items())

From 02170622f0cc19e58af1fbff990b1093c1c2aaf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= <kristian.zarebski@ukaea.uk>
Date: Wed, 26 Mar 2025 13:21:10 +0000
Subject: [PATCH 2/3] Updated Changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fcffc1f4..dca6ecb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
 
 ## Unreleased
 * Added sorting to server queries, users can now specify to sort by columns during data retrieval from the database.
+* Added ability to include environment variables within metadata for runs.
 ## [v2.0.1](https://github.com/simvue-io/client/releases/tag/v2.0.1) - 2025-03-24
 * Improvements to docstrings on methods, classes and functions.
 ## [v2.0.0](https://github.com/simvue-io/client/releases/tag/v2.0.0) - 2025-03-07

From 86938e9c2e0b98519731dfbca6e0067dc0f88d97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20Zar=C4=99bski?= <kristian.zarebski@ukaea.uk>
Date: Thu, 14 Aug 2025 09:50:39 +0100
Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=A7=AA=20Fix=20various=20test=20issue?=
 =?UTF-8?q?s,=20including=20special=20case=20for=20events=20and=20metrics?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 simvue/api/objects/base.py                   | 13 ++++--
 simvue/api/objects/events.py                 |  1 +
 simvue/api/objects/metrics.py                |  1 +
 tests/functional/test_client.py              | 49 +++++++++++++++-----
 tests/functional/test_executor.py            |  4 +-
 tests/functional/test_run_class.py           |  2 +-
 tests/functional/test_run_execute_process.py | 31 ++++++++-----
 tests/unit/test_file_artifact.py             |  4 +-
 8 files changed, 74 insertions(+), 31 deletions(-)

diff --git a/simvue/api/objects/base.py b/simvue/api/objects/base.py
index 65659bc5..cb178e44 100644
--- a/simvue/api/objects/base.py
+++ b/simvue/api/objects/base.py
@@ -184,6 +184,7 @@ def __init__(
         self._logger = logging.getLogger(f"simvue.{self.__class__.__name__}")
         self._label: str = getattr(self, "_label", self.__class__.__name__.lower())
         self._read_only: bool = _read_only
+        self._is_set: bool = False
         self._endpoint: str = getattr(self, "_endpoint", f"{self._label}s")
         self._identifier: str | None = (
             identifier if identifier is not None else f"offline_{uuid.uuid1()}"
@@ -563,9 +564,15 @@ def _post(self, is_json: bool = True, **kwargs) -> dict[str, typing.Any]:
         if _id := _json_response.get("id"):
             self._logger.debug("'%s' created successfully", _id)
             self._identifier = _id
-        else:
-            _detail = _json_response.get("detail", "")
-            raise RuntimeError(f"Expected new ID for {self._label} but none found: {_detail}.")
+        elif not self._is_set:
+            _detail = _json_response.get("detail", _json_response)
+
+            if not _detail:
+                _detail = "No information in JSON response."
+
+            raise RuntimeError(
+                f"Expected new ID for {self._label} but none found: {_detail}."
+            )
 
         return _json_response
 
diff --git a/simvue/api/objects/events.py b/simvue/api/objects/events.py
index ff5b9e50..f786c99b 100644
--- a/simvue/api/objects/events.py
+++ b/simvue/api/objects/events.py
@@ -37,6 +37,7 @@ def __init__(
         self._label = "event"
         super().__init__(_read_only=_read_only, _local=_local, **kwargs)
         self._run_id = self._staging.get("run")
+        self._is_set = True
 
     @classmethod
     @pydantic.validate_call
diff --git a/simvue/api/objects/metrics.py b/simvue/api/objects/metrics.py
index 5e5a1988..fb9cea1e 100644
--- a/simvue/api/objects/metrics.py
+++ b/simvue/api/objects/metrics.py
@@ -39,6 +39,7 @@ def __init__(
         self._label = "metric"
         super().__init__(_read_only=_read_only, _local=_local, **kwargs)
         self._run_id = self._staging.get("run")
+        self._is_set = True
 
     @classmethod
     @pydantic.validate_call
diff --git a/tests/functional/test_client.py b/tests/functional/test_client.py
index 0abc9033..197bdcc8 100644
--- a/tests/functional/test_client.py
+++ b/tests/functional/test_client.py
@@ -148,6 +148,16 @@ def test_plot_metrics(create_test_run: tuple[sv_run.Run, dict]) -> None:
         pytest.skip("Plotting modules not found")
 
     client = svc.Client()
+    attempts: int = 0
+    while (
+        not list(client.get_metrics_names(create_test_run[1]["run_id"]))
+        and attempts < 10
+    ):
+        time.sleep(1)
+        attempts += 1
+
+    if attempts >= 10:
+        raise AssertionError("Failed to retrieve metrics.")
     client.plot_metrics(
         run_ids=[create_test_run[1]["run_id"]],
         metric_names=list(create_test_run[1]["metrics"]),
@@ -244,19 +254,34 @@ def test_get_runs(
     attributes: list[str] | None
 ) -> None:
     client = svc.Client()
+    attempts: int = 0
+
+    _result = None
+    while (
+        _result is None
+        and attempts < 10
+    ):
+        time.sleep(1)
+        try:
+            _result = client.get_runs(
+                filters=[],
+                output_format=output_format,
+                count_limit=10,
+                sort_by_columns=sorting,
+                timing_info=timing_info,
+                system_info=system_info,
+                metrics=metrics,
+                metadata=metadata,
+                alerts=alerts,
+                attributes=attributes
+            )
+        except ObjectNotFoundError:
+            _result = None
+        attempts += 1
+
+    if attempts >= 10:
+        raise AssertionError("Failed to retrieve created runs.")
 
-    _result = client.get_runs(
-        filters=[],
-        output_format=output_format,
-        count_limit=10,
-        sort_by_columns=sorting,
-        timing_info=timing_info,
-        system_info=system_info,
-        metrics=metrics,
-        metadata=metadata,
-        alerts=alerts,
-        attributes=attributes
-    )
 
     if output_format == "dataframe":
         assert not _result.empty
diff --git a/tests/functional/test_executor.py b/tests/functional/test_executor.py
index b0063764..0cf50b23 100644
--- a/tests/functional/test_executor.py
+++ b/tests/functional/test_executor.py
@@ -96,8 +96,8 @@ def callback(*_, evts=events, ident=i, **__):
             assert out_file.exists()
             assert triggers[i].is_set()
     for i in range(10):
-        os.remove(f"test_executor_multiprocess_cmd_{i}.err")
-        os.remove(f"test_executor_multiprocess_cmd_{i}.out")
+        os.remove(f"test_executor_multiprocess_cmd_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.err")
+        os.remove(f"test_executor_multiprocess_cmd_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.out")
     with contextlib.suppress(ObjectNotFoundError):
         folder.delete(recursive=True, delete_runs=True)
 
diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py
index 30a5b4b3..804f9bb6 100644
--- a/tests/functional/test_run_class.py
+++ b/tests/functional/test_run_class.py
@@ -1303,7 +1303,7 @@ def test_env_var_metadata() -> None:
             running=False,
             record_shell_vars={"SIMVUE_RUN_TEST_VAR_*"}
         )
-    _recorded_meta = RunObject(identifier=run._id).metadata
+    _recorded_meta = RunObject(identifier=run.id).metadata
     assert all(key in _recorded_meta.get("shell") for key in _recorded_env)
 
 @pytest.mark.run
diff --git a/tests/functional/test_run_execute_process.py b/tests/functional/test_run_execute_process.py
index f1ca896c..86bc9d13 100644
--- a/tests/functional/test_run_execute_process.py
+++ b/tests/functional/test_run_execute_process.py
@@ -1,3 +1,4 @@
+import contextlib
 import pathlib
 import time
 import os
@@ -5,7 +6,6 @@
 import tempfile
 import pytest
 import filecmp
-import simvue.sender as sv_send
 
 from simvue import Run, Client
 from simvue.sender import sender
@@ -35,7 +35,7 @@ def test_abort_all_processes(create_plain_run: tuple[Run, dict]) -> None:
 
         for i in range(1, 3):
             _run.add_process(f"process_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}", executable="bash", script=temp_f.name)
-            assert _run.executor.get_command(f"process_{i}") == f"bash {temp_f.name}"
+            assert _run.executor.get_command(f"process_{i}_{os.environ.get("PYTEST_XDIST_WORKER", 0)}") == f"bash {temp_f.name}"
 
 
         time.sleep(3)
@@ -44,7 +44,7 @@ def test_abort_all_processes(create_plain_run: tuple[Run, dict]) -> None:
 
         # Check that for when one of the processes has stopped
         _attempts: int = 0
-        _first_out = next(pathlib.Path.cwd().glob("*process_*.out"))
+        _first_out = next(pathlib.Path.cwd().glob(f"*process_*_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.out"))
 
         while _first_out.stat().st_size == 0 and _attempts < 10:
             time.sleep(1)
@@ -54,19 +54,20 @@ def test_abort_all_processes(create_plain_run: tuple[Run, dict]) -> None:
             raise AssertionError("Failed to terminate processes")
 
         # Check the Python process did not error
-        _out_err = pathlib.Path.cwd().glob("*process_*.err")
+        _out_err = pathlib.Path.cwd().glob(f"*process_*_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.err")
         for file in _out_err:
             with file.open() as in_f:
                 assert not in_f.readlines()
 
         # Now check the counter in the process was terminated
         # just beyond the sleep time
-        _out_files = pathlib.Path.cwd().glob("*process_*.out")
+        _out_files = pathlib.Path.cwd().glob(f"*process_*_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.out")
         for file in _out_files:
             with file.open() as in_f:
                 assert (lines := in_f.readlines())
                 assert int(lines[0].strip()) < 4
-    os.unlink(temp_f.name)
+    with contextlib.suppress(FileNotFoundError):
+        os.unlink(temp_f.name)
 
 
 def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None:
@@ -78,11 +79,16 @@ def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None:
     """
     run, _ = create_plain_run
     with tempfile.TemporaryDirectory() as temp_dir:
-        with tempfile.NamedTemporaryFile(delete=False, dir=temp_dir, suffix=".py") as temp_file:
+        with tempfile.NamedTemporaryFile(
+            delete=False,
+            dir=temp_dir,
+            prefix=os.environ.get("PYTEST_XDIST_WORKER", "0"),
+            suffix=".py"
+        ) as temp_file:
             with open(temp_file.name, "w") as out_f:
                 out_f.writelines([
                     "import os\n",
-                    "f = open('new_file.txt', 'w')\n",
+                    f"f = open('new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt', 'w')\n",
                     "f.write('Test Line')\n",
                     "f.close()"
                 ])
@@ -95,7 +101,7 @@ def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None:
                 cwd=temp_dir
             )
             time.sleep(1)
-            run.save_file(os.path.join(temp_dir, "new_file.txt"), 'output')
+            run.save_file(os.path.join(temp_dir, f"new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt"), 'output')
 
             client = Client()
 
@@ -104,8 +110,9 @@ def test_processes_cwd(create_plain_run: dict[Run, dict]) -> None:
             client.get_artifact_as_file(run_id, os.path.basename(temp_file.name), output_dir=os.path.join(temp_dir, "downloaded"))
             assert filecmp.cmp(os.path.join(temp_dir, "downloaded", os.path.basename(temp_file.name)), temp_file.name)
 
-            client.get_artifact_as_file(run_id, "new_file.txt", output_dir=os.path.join(temp_dir, "downloaded"))
-            with open(os.path.join(temp_dir, "downloaded", "new_file.txt"), "r") as new_file:
+            client.get_artifact_as_file(run_id, f"new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt", output_dir=os.path.join(temp_dir, "downloaded"))
+            with open(os.path.join(temp_dir, "downloaded", f"new_file_{os.environ.get("PYTEST_XDIST_WORKER", 0)}.txt"), "r") as new_file:
                 assert new_file.read() == "Test Line"
-    os.unlink(temp_file.name)
+    with contextlib.suppress(FileNotFoundError):
+        os.unlink(temp_file.name)
 
diff --git a/tests/unit/test_file_artifact.py b/tests/unit/test_file_artifact.py
index aeb47eda..c442086c 100644
--- a/tests/unit/test_file_artifact.py
+++ b/tests/unit/test_file_artifact.py
@@ -1,3 +1,4 @@
+import contextlib
 import pytest
 import os
 import uuid
@@ -46,7 +47,8 @@ def test_file_artifact_creation_online() -> None:
         assert _artifact.to_dict()
     _run.delete()
     _folder.delete(recursive=True, delete_runs=True, runs_only=False)
-    os.unlink(temp_f.name)
+    with contextlib.suppress(FileNotFoundError):
+        os.unlink(temp_f.name)
     if _failed:
         raise AssertionError("\n\t-" + "\n\t- ".join(": ".join(i) for i in _failed))