From 1c48a4bb9c206edd9ea115d88840e3d3916c71f1 Mon Sep 17 00:00:00 2001 From: erikhuck Date: Tue, 1 Apr 2025 16:54:13 -0400 Subject: [PATCH] Introduces the SubTracker --- src/gpu_tracker/__init__.py | 1 + src/gpu_tracker/_helper_classes.py | 17 +++++-- src/gpu_tracker/sub_tracker.py | 46 +++++++++++++++++++ src/gpu_tracker/tracker.py | 4 +- tests/data/None_None.csv | 11 +++++ tests/data/None_sub-tracking-file.csv.csv | 11 +++++ tests/data/None_sub-tracking-file.sqlite.csv | 11 +++++ tests/data/my-code-block_None.csv | 11 +++++ .../my-code-block_sub-tracking-file.csv.csv | 11 +++++ ...my-code-block_sub-tracking-file.sqlite.csv | 11 +++++ tests/test_sub_tracker.py | 41 +++++++++++++++++ tests/test_tracker.py | 13 +----- tests/utils.py | 22 +++++++++ 13 files changed, 193 insertions(+), 17 deletions(-) create mode 100644 src/gpu_tracker/sub_tracker.py create mode 100644 tests/data/None_None.csv create mode 100644 tests/data/None_sub-tracking-file.csv.csv create mode 100644 tests/data/None_sub-tracking-file.sqlite.csv create mode 100644 tests/data/my-code-block_None.csv create mode 100644 tests/data/my-code-block_sub-tracking-file.csv.csv create mode 100644 tests/data/my-code-block_sub-tracking-file.sqlite.csv create mode 100644 tests/test_sub_tracker.py diff --git a/src/gpu_tracker/__init__.py b/src/gpu_tracker/__init__.py index 15cc670..7e2b829 100644 --- a/src/gpu_tracker/__init__.py +++ b/src/gpu_tracker/__init__.py @@ -10,3 +10,4 @@ __version__ = _gv(_path.join(_path.dirname(__file__), _path.pardir)) from .tracker import Tracker +from .sub_tracker import SubTracker diff --git a/src/gpu_tracker/_helper_classes.py b/src/gpu_tracker/_helper_classes.py index 187d1f5..1879eb0 100644 --- a/src/gpu_tracker/_helper_classes.py +++ b/src/gpu_tracker/_helper_classes.py @@ -8,6 +8,7 @@ import dataclasses as dclass import sqlalchemy as sqlalc import sqlalchemy.orm as sqlorm +import enum class _GPUQuerier(abc.ABC): @@ -111,7 +112,7 @@ def ram_and_utilization(cls) -> pd.DataFrame: @dclass.dataclass -class TimepointUsage: +class _TimepointUsage: main_ram: float = 0.0 descendants_ram: float = 0.0 combined_ram: float = 0.0 @@ -136,6 +137,16 @@ class TimepointUsage: timestamp: float = 0.0 +@dclass.dataclass +class _SubTrackerLog: + class CodeBlockPosition(enum.Enum): + START = 'START' + END = 'END' + code_block_name: str + position: CodeBlockPosition + timestamp: float + + class _TrackingFile(abc.ABC): @staticmethod def create(file: str | None) -> _TrackingFile | None: @@ -153,7 +164,7 @@ def create(file: str | None) -> _TrackingFile | None: def __init__(self, file: str): self._file = file - def write_row(self, values: TimepointUsage): + def write_row(self, values: _TimepointUsage | _SubTrackerLog): values = dclass.asdict(values) if not os.path.isfile(self._file): self._create_file(values) @@ -206,5 +217,5 @@ def _create_file(self, values: dict): for column_name, data_type in schema.items(): sqlalchemy_type = type_mapping[data_type] columns.append(sqlalc.Column(column_name, sqlalchemy_type)) - tracking_table = sqlalc.Table(_SQLiteTrackingFile._SQLITE_TABLE_NAME, metadata, *columns) + sqlalc.Table(_SQLiteTrackingFile._SQLITE_TABLE_NAME, metadata, *columns) metadata.create_all(engine) diff --git a/src/gpu_tracker/sub_tracker.py b/src/gpu_tracker/sub_tracker.py new file mode 100644 index 0000000..a608f2f --- /dev/null +++ b/src/gpu_tracker/sub_tracker.py @@ -0,0 +1,46 @@ +"""The ``sub_tracker`` module contains the ``SubTracker`` class which can alternatively be imported directly from the ``gpu_tracker`` package.""" +import inspect +import os +import time +from ._helper_classes import _TrackingFile, _SubTrackerLog + + +class SubTracker: + """ + Context manager that logs to a file for the purposes of sub tracking a code block using the timestamps at which the codeblock begins and ends. + Entering the context manager marks the beginning of the code block and exiting the context manager marks the end of the code block. + At the beginning of the codeblock, the ``SubTracker`` logs a row to a tablular file (".csv" or ".sqlite") that includes the timestamp along with a name for the code block and an indication of whether it is the start or end of the code bock. + This resulting file can be used alongside a tracking file created by a ``Tracker`` object for more granular analysis of specific code blocks. + + :ivar str code_block_name: The name of the code block being sub-tracked. + :ivar str sub_tracking_file: The path to the file where the sub-tracking info is logged. + """ + def __init__(self, code_block_name: str | None = None, sub_tracking_file: str | None = None): + """ + :param code_block_name: The name of the code block within a ``Tracker`` context that is being sub-tracked. Defaults to the file path and line number where the SubTracker context is started. + :param sub_tracking_file: The path to the file to log the time stamps of the code block being sub-tracked Defaults to the ID of the process where the SubTracker context is created and in CSV format. + """ + if code_block_name is not None: + self.code_block_name = code_block_name + else: + stack = inspect.stack() + caller_frame = stack[1] + file_path = os.path.abspath(caller_frame.filename) + line_number = caller_frame.lineno + self.code_block_name = f'{file_path}:{line_number}' + if sub_tracking_file is None: + sub_tracking_file = f'{os.getpid()}.csv' + self.sub_tracking_file = sub_tracking_file + self._sub_tracking_file = _TrackingFile.create(self.sub_tracking_file) + + def _log(self, code_block_position: _SubTrackerLog.CodeBlockPosition): + sub_tracker_log = _SubTrackerLog( + code_block_name=self.code_block_name, position=code_block_position.value, timestamp=time.time()) + self._sub_tracking_file.write_row(sub_tracker_log) + + def __enter__(self): + self._log(_SubTrackerLog.CodeBlockPosition.START) + return self + + def __exit__(self, *_): + self._log(_SubTrackerLog.CodeBlockPosition.END) diff --git a/src/gpu_tracker/tracker.py b/src/gpu_tracker/tracker.py index 64e6bcb..2325100 100644 --- a/src/gpu_tracker/tracker.py +++ b/src/gpu_tracker/tracker.py @@ -13,7 +13,7 @@ import pickle as pkl import uuid import pandas as pd -from ._helper_classes import _NvidiaQuerier, _AMDQuerier, _TrackingFile, TimepointUsage +from ._helper_classes import _NvidiaQuerier, _AMDQuerier, _TrackingFile, _TimepointUsage class _TrackingProcess(mproc.Process): @@ -140,7 +140,7 @@ def run(self): self._stop_event.set() # Simulate a do-while loop so that the tracking is executed at least once. while True: - timepoint_usage = TimepointUsage() + timepoint_usage = _TimepointUsage() with open(self._resource_usage_file, 'wb') as file: pkl.dump(self._resource_usage, file) if self._stop_event.is_set(): diff --git a/tests/data/None_None.csv b/tests/data/None_None.csv new file mode 100644 index 0000000..536400f --- /dev/null +++ b/tests/data/None_None.csv @@ -0,0 +1,11 @@ +position,timestamp +START,0 +END,1 +START,2 +END,3 +START,4 +END,5 +START,6 +END,7 +START,8 +END,9 diff --git a/tests/data/None_sub-tracking-file.csv.csv b/tests/data/None_sub-tracking-file.csv.csv new file mode 100644 index 0000000..536400f --- /dev/null +++ b/tests/data/None_sub-tracking-file.csv.csv @@ -0,0 +1,11 @@ +position,timestamp +START,0 +END,1 +START,2 +END,3 +START,4 +END,5 +START,6 +END,7 +START,8 +END,9 diff --git a/tests/data/None_sub-tracking-file.sqlite.csv b/tests/data/None_sub-tracking-file.sqlite.csv new file mode 100644 index 0000000..536400f --- /dev/null +++ b/tests/data/None_sub-tracking-file.sqlite.csv @@ -0,0 +1,11 @@ +position,timestamp +START,0 +END,1 +START,2 +END,3 +START,4 +END,5 +START,6 +END,7 +START,8 +END,9 diff --git a/tests/data/my-code-block_None.csv b/tests/data/my-code-block_None.csv new file mode 100644 index 0000000..536400f --- /dev/null +++ b/tests/data/my-code-block_None.csv @@ -0,0 +1,11 @@ +position,timestamp +START,0 +END,1 +START,2 +END,3 +START,4 +END,5 +START,6 +END,7 +START,8 +END,9 diff --git a/tests/data/my-code-block_sub-tracking-file.csv.csv b/tests/data/my-code-block_sub-tracking-file.csv.csv new file mode 100644 index 0000000..536400f --- /dev/null +++ b/tests/data/my-code-block_sub-tracking-file.csv.csv @@ -0,0 +1,11 @@ +position,timestamp +START,0 +END,1 +START,2 +END,3 +START,4 +END,5 +START,6 +END,7 +START,8 +END,9 diff --git a/tests/data/my-code-block_sub-tracking-file.sqlite.csv b/tests/data/my-code-block_sub-tracking-file.sqlite.csv new file mode 100644 index 0000000..536400f --- /dev/null +++ b/tests/data/my-code-block_sub-tracking-file.sqlite.csv @@ -0,0 +1,11 @@ +position,timestamp +START,0 +END,1 +START,2 +END,3 +START,4 +END,5 +START,6 +END,7 +START,8 +END,9 diff --git a/tests/test_sub_tracker.py b/tests/test_sub_tracker.py new file mode 100644 index 0000000..a94bce3 --- /dev/null +++ b/tests/test_sub_tracker.py @@ -0,0 +1,41 @@ +import pytest as pt +import gpu_tracker as gput +import utils + + +@pt.fixture(name='code_block_name', params=['my-code-block', None]) +def get_code_block_name(request) -> str | None: + yield request.param + + +@pt.fixture(name='sub_tracking_file', params=['sub-tracking-file.csv', 'sub-tracking-file.sqlite', None]) +def get_sub_tracking_file(request) -> str | None: + yield request.param + + +def test_sub_tracker(mocker, code_block_name: str | None, sub_tracking_file: str | None): + n_iterations = 5 + getpid_mock = mocker.patch('gpu_tracker.sub_tracker.os.getpid', side_effect=[1234] * n_iterations) + time_mock = mocker.patch( + 'gpu_tracker.sub_tracker.time', time=mocker.MagicMock(side_effect=range(n_iterations * 2))) + default_code_block_end = 'test_sub_tracker.py:23' + for _ in range(n_iterations): + with gput.SubTracker(code_block_name=code_block_name, sub_tracking_file=sub_tracking_file) as sub_tracker: + if code_block_name is None: + assert sub_tracker.code_block_name.endswith(default_code_block_end) + if sub_tracking_file is None: + assert sub_tracker.sub_tracking_file == '1234.csv' + if sub_tracking_file is None: + assert len(getpid_mock.call_args_list) == n_iterations + assert len(time_mock.time.call_args_list) == n_iterations * 2 + + def code_block_name_test(val: str): + if code_block_name is None: + assert val.endswith(default_code_block_end) + else: + assert val == code_block_name + expected_tracking_file = f'tests/data/{code_block_name}_{sub_tracking_file}.csv' + utils.test_tracking_file( + actual_tracking_file=sub_tracker.sub_tracking_file, expected_tracking_file=expected_tracking_file, + excluded_col='code_block_name', excluded_col_test=code_block_name_test + ) diff --git a/tests/test_tracker.py b/tests/test_tracker.py index 779b78f..6bd17b4 100644 --- a/tests/test_tracker.py +++ b/tests/test_tracker.py @@ -5,10 +5,6 @@ import pytest as pt import utils import subprocess as subp -import pandas as pd -import sqlalchemy as sqlalc -# noinspection PyProtectedMember -from gpu_tracker._helper_classes import _SQLiteTrackingFile gpu_unavailable_message = ('Neither the nvidia-smi command nor the amd-smi command is installed. Install one of these to profile the ' @@ -231,14 +227,7 @@ def start_mock(self): if tracking_file is None: assert tracker._tracking_process.tracking_file is None else: - if tracking_file.endswith('.csv'): - actual_timepoint_usage = pd.read_csv(tracking_file) - else: - engine = sqlalc.create_engine(f'sqlite:///{tracking_file}', poolclass=sqlalc.pool.NullPool) - actual_timepoint_usage = pd.read_sql_table(_SQLiteTrackingFile._SQLITE_TABLE_NAME, engine) - expected_timepoint_usage = pd.read_csv(f'{expected_measurements_file}.csv') - pd.testing.assert_frame_equal(expected_timepoint_usage, actual_timepoint_usage, atol=1e-10, rtol=1e-10) - os.remove(tracking_file) + utils.test_tracking_file(actual_tracking_file=tracking_file, expected_tracking_file=f'{expected_measurements_file}.csv') def test_cannot_connect_warnings(mocker, caplog): diff --git a/tests/utils.py b/tests/utils.py index cb9f7f9..32e3d25 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,3 +1,25 @@ +import pandas as pd +import sqlalchemy as sqlalc +import os +# noinspection PyProtectedMember +from gpu_tracker._helper_classes import _SQLiteTrackingFile + + def assert_args_list(mock, expected_args_list: list[tuple | dict], use_kwargs: bool = False): actual_args_list = [call.kwargs if use_kwargs else call.args for call in mock.call_args_list] assert actual_args_list == expected_args_list + + +def test_tracking_file( + actual_tracking_file: str, expected_tracking_file: str, excluded_col: str | None = None, excluded_col_test=None): + if actual_tracking_file.endswith('.csv'): + actual_tracking_log = pd.read_csv(actual_tracking_file) + else: + engine = sqlalc.create_engine(f'sqlite:///{actual_tracking_file}', poolclass=sqlalc.pool.NullPool) + actual_tracking_log = pd.read_sql_table(_SQLiteTrackingFile._SQLITE_TABLE_NAME, engine) + if excluded_col is not None: + actual_tracking_log[excluded_col].apply(excluded_col_test) + actual_tracking_log = actual_tracking_log[actual_tracking_log.columns.difference([excluded_col])] + expected_tracking_log = pd.read_csv(expected_tracking_file) + pd.testing.assert_frame_equal(expected_tracking_log, actual_tracking_log, atol=1e-10, rtol=1e-10) + os.remove(actual_tracking_file)