From 2ae5dd9ab4ed3ca993d993a072486597c2205834 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Tue, 2 Sep 2025 10:03:50 +0100 Subject: [PATCH 01/23] made the it_work test --- kloppy/tests/test_it_works.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 kloppy/tests/test_it_works.py diff --git a/kloppy/tests/test_it_works.py b/kloppy/tests/test_it_works.py new file mode 100644 index 000000000..17e0d3f23 --- /dev/null +++ b/kloppy/tests/test_it_works.py @@ -0,0 +1,5 @@ +def it_works(): + return "it works" + +def test_it_works(): + assert it_works() == "it works" \ No newline at end of file From 8529633f1d4a930ed6e30d7302c0e379971590dc Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Mon, 8 Sep 2025 21:34:38 +0200 Subject: [PATCH 02/23] feat(cdf): add Common Data Format tracking data serializer skeleton - Add CDFTrackingDataSerializer with placeholder implementation - Add failing test using common-data-format-validator - Add Provider.CDF enum value - Add common-data-format-validator to test requirements --- kloppy/_providers/cdf.py | 6 ++ kloppy/domain/models/common.py | 2 + .../serializers/tracking/cdf/serializer.py | 28 ++++++++ .../infra/serializers/tracking/serializer.py | 20 ++++++ kloppy/tests/test_cdf.py | 64 +++++++++++++++++++ setup.py | 1 + 6 files changed, 121 insertions(+) create mode 100644 kloppy/_providers/cdf.py create mode 100644 kloppy/infra/serializers/tracking/cdf/serializer.py create mode 100644 kloppy/infra/serializers/tracking/serializer.py create mode 100644 kloppy/tests/test_cdf.py diff --git a/kloppy/_providers/cdf.py b/kloppy/_providers/cdf.py new file mode 100644 index 000000000..309c8d5d6 --- /dev/null +++ b/kloppy/_providers/cdf.py @@ -0,0 +1,6 @@ +from typing import IO, NamedTuple + + +class CDFOutputs(NamedTuple): + meta_data: IO[bytes] + tracking_data: IO[bytes] diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index e0ab88ce1..8ba58191a 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -116,6 +116,7 @@ class Provider(Enum): DATAFACTORY (Provider): STATSPERFORM (Provider): SPORTVU (Provider): + CDF (Provider): OTHER (Provider): """ @@ -134,6 +135,7 @@ class Provider(Enum): HAWKEYE = "hawkeye" SPORTVU = "sportvu" SIGNALITY = "signality" + CDF = "common_data_format" OTHER = "other" def __str__(self): diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py new file mode 100644 index 000000000..ee5c804ae --- /dev/null +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -0,0 +1,28 @@ +from typing import IO, NamedTuple + +from kloppy.domain import Provider, TrackingDataset +from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer + + +class CDFOutputs(NamedTuple): + meta_data: IO[bytes] + tracking_data: IO[bytes] + + +class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): + provider = Provider.CDF + + def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: + """ + Serialize a TrackingDataset to Common Data Format. + + Args: + dataset: The tracking dataset to serialize + outputs: CDFOutputs containing file handles for metadata and tracking data + + Returns: + bool: True if serialization was successful, False otherwise + """ + outputs.meta_data.write(b'{"TODO": "implement metadata generation"}') + outputs.tracking_data.write(b'{"TODO": "implement tracking data generation"}') + return True diff --git a/kloppy/infra/serializers/tracking/serializer.py b/kloppy/infra/serializers/tracking/serializer.py new file mode 100644 index 000000000..a100c37f2 --- /dev/null +++ b/kloppy/infra/serializers/tracking/serializer.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod +from typing import Generic, TypeVar + +from kloppy.domain import ( + Provider, + TrackingDataset +) + +T = TypeVar("T") + + +class TrackingDataSerializer(ABC, Generic[T]): + @property + @abstractmethod + def provider(self) -> Provider: + raise NotImplementedError + + @abstractmethod + def serialize(self, dataset: TrackingDataset, outputs: T) -> bool: + raise NotImplementedError diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py new file mode 100644 index 000000000..9f653db6e --- /dev/null +++ b/kloppy/tests/test_cdf.py @@ -0,0 +1,64 @@ +import tempfile +from pathlib import Path + +import pytest +import cdf + +from kloppy import sportec +from kloppy.domain import TrackingDataset +from kloppy.infra.serializers.tracking.cdf.serializer import CDFTrackingDataSerializer, CDFOutputs + + +class TestCDFSerializer: + + @pytest.fixture + def raw_data(self, base_dir) -> Path: + return base_dir / "files/sportec_positional.xml" + + @pytest.fixture + def meta_data(self, base_dir) -> Path: + return base_dir / "files/sportec_meta.xml" + + @pytest.fixture + def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: + """Load a small Sportec tracking data snippet for testing CDF serialization.""" + return sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + limit=None, + only_alive=False, + ) + + def test_produces_valid_cdf_output(self, dataset): + """Test that CDFTrackingDataSerializer produces valid CDF output.""" + serializer = CDFTrackingDataSerializer() + + # Create temporary files with .jsonl extension for CDF validation + with tempfile.NamedTemporaryFile(mode='w+b', suffix='.json', delete=False) as meta_file, \ + tempfile.NamedTemporaryFile(mode='w+b', suffix='.jsonl', delete=False) as tracking_file: + + outputs = CDFOutputs( + meta_data=meta_file, + tracking_data=tracking_file + ) + + # Serialize the small Sportec dataset to CDF format + success = serializer.serialize(dataset, outputs) + assert success is True + + # Close files to ensure data is written + meta_file.close() + tracking_file.close() + + # Validate using CDF validators + tracking_validator = cdf.TrackingSchemaValidator() + meta_validator = cdf.MetaSchemaValidator() + + # This throws errors on invalid data + tracking_validator.validate_schema(sample=tracking_file.name) + meta_validator.validate_schema(sample=meta_file.name) + + # Clean up temp files + Path(meta_file.name).unlink() + Path(tracking_file.name).unlink() diff --git a/setup.py b/setup.py index c8d7757cd..04cc02172 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ def setup_package(): "flask", "flask-cors", "pytest-httpserver", + "common-data-format-validator", ], "development": ["pre-commit==2.6.0"], "query": ["networkx>=2.4,<3"], From 32768c0a77ac4878bb3e3e91a6891fcbdc41dc16 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Mon, 8 Sep 2025 21:36:28 +0200 Subject: [PATCH 03/23] minor --- kloppy/_providers/cdf.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kloppy/_providers/cdf.py b/kloppy/_providers/cdf.py index 309c8d5d6..e69de29bb 100644 --- a/kloppy/_providers/cdf.py +++ b/kloppy/_providers/cdf.py @@ -1,6 +0,0 @@ -from typing import IO, NamedTuple - - -class CDFOutputs(NamedTuple): - meta_data: IO[bytes] - tracking_data: IO[bytes] From f9d7678217253247419d31564633dce24bf4cc42 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 9 Sep 2025 10:25:25 +0200 Subject: [PATCH 04/23] Add a successful test to show what metadata and tracking data there should be. --- .../serializers/tracking/cdf/serializer.py | 285 +++++++++++++++++- .../infra/serializers/tracking/serializer.py | 5 +- kloppy/tests/test_cdf.py | 36 ++- 3 files changed, 303 insertions(+), 23 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index ee5c804ae..e16badce5 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -15,14 +15,291 @@ class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: """ Serialize a TrackingDataset to Common Data Format. - + Args: dataset: The tracking dataset to serialize outputs: CDFOutputs containing file handles for metadata and tracking data - + Returns: bool: True if serialization was successful, False otherwise + + Note: + TODO: Open question: should the serializer make sure the data is in the right format, and + do a transformation if not in the right format? """ - outputs.meta_data.write(b'{"TODO": "implement metadata generation"}') - outputs.tracking_data.write(b'{"TODO": "implement tracking data generation"}') + + outputs.meta_data.write( + b"""{ + "competition": { + "id": "comp_123", + "name": "Dutch Eredivisie Under 20", + "format": "league_20", + "age_restriction": null, + "type": "mens" + }, + "season": { + "id": "season_2023", + "name": "2022/23" + }, + "match": { + "id": "match_456", + "kickoff_time": "2023-05-15T19:45:00Z", + "periods": [ + { + "period": "first_half", + "play_direction": "left_right", + "start_time": "2023-05-15T19:45:00Z", + "end_time": "2023-05-15T20:30:00Z", + "start_frame_id": 0, + "end_frame_id": 27000, + "left_team_id": "team_789", + "right_team_id": "team_101" + }, + { + "period": "second_half", + "play_direction": "right_left", + "start_time": "2023-05-15T20:45:00Z", + "end_time": "2023-05-15T21:30:00Z", + "start_frame_id": 27001, + "end_frame_id": 54000, + "left_team_id": "team_101", + "right_team_id": "team_789" + } + ], + "whistles": [ + { + "type": "first_half", + "sub_type": "start", + "time": "2023-05-15T19:45:00Z" + }, + { + "type": "first_half", + "sub_type": "end", + "time": "2023-05-15T20:30:00Z" + }, + { + "type": "second_half", + "sub_type": "start", + "time": "2023-05-15T20:45:00Z" + }, + { + "type": "second_half", + "sub_type": "end", + "time": "2023-05-15T21:30:00Z" + } + ], + "round": "38", + "scheduled_kickoff_time": "2023-05-15T19:45:00Z", + "local_kickoff_time": "2023-05-15T20:45:00+01:00", + "misc": { + "country": "Netherlands", + "city": "Breda", + "percipitation": 0.5, + "is_open_roof": true + } + }, + "teams": { + "home": { + "id": "team_789", + "players": [ + { + "id": "player_1", + "team_id": "team_789", + "jersey_number": 1, + "is_starter": true + }, + { + "id": "player_2", + "team_id": "team_789", + "jersey_number": 2, + "is_starter": true + }, + { + "id": "player_3", + "team_id": "team_789", + "jersey_number": 3, + "is_starter": true + }, + { + "id": "player_4", + "team_id": "team_789", + "jersey_number": 4, + "is_starter": true + }, + { + "id": "player_5", + "team_id": "team_789", + "jersey_number": 5, + "is_starter": true + }, + { + "id": "player_6", + "team_id": "team_789", + "jersey_number": 6, + "is_starter": true + }, + { + "id": "player_7", + "team_id": "team_789", + "jersey_number": 7, + "is_starter": true + }, + { + "id": "player_8", + "team_id": "team_789", + "jersey_number": 8, + "is_starter": true + }, + { + "id": "player_9", + "team_id": "team_789", + "jersey_number": 9, + "is_starter": true + }, + { + "id": "player_10", + "team_id": "team_789", + "jersey_number": 10, + "is_starter": true + }, + { + "id": "player_11", + "team_id": "team_789", + "jersey_number": 11, + "is_starter": true + }, + { + "id": "player_12", + "team_id": "team_789", + "jersey_number": 12, + "is_starter": false + } + ] + }, + "away": { + "id": "team_101", + "players": [ + { + "id": "player_101", + "team_id": "team_101", + "jersey_number": 1, + "is_starter": true + }, + { + "id": "player_102", + "team_id": "team_101", + "jersey_number": 2, + "is_starter": true + }, + { + "id": "player_103", + "team_id": "team_101", + "jersey_number": 3, + "is_starter": true + }, + { + "id": "player_104", + "team_id": "team_101", + "jersey_number": 4, + "is_starter": true + }, + { + "id": "player_105", + "team_id": "team_101", + "jersey_number": 5, + "is_starter": true + }, + { + "id": "player_106", + "team_id": "team_101", + "jersey_number": 6, + "is_starter": true + }, + { + "id": "player_107", + "team_id": "team_101", + "jersey_number": 7, + "is_starter": true + }, + { + "id": "player_108", + "team_id": "team_101", + "jersey_number": 8, + "is_starter": true + }, + { + "id": "player_109", + "team_id": "team_101", + "jersey_number": 9, + "is_starter": true + }, + { + "id": "player_110", + "team_id": "team_101", + "jersey_number": 10, + "is_starter": true + }, + { + "id": "player_111", + "team_id": "team_101", + "jersey_number": 11, + "is_starter": true + }, + { + "id": "player_112", + "team_id": "team_101", + "jersey_number": 12, + "is_starter": false + } + ] + } + }, + "stadium": { + "id": "stadium_202", + "pitch_length": 105.0, + "pitch_width": 68.0, + "name": "A Stadium", + "turf": "grass" + }, + "meta": { + "video": { + "perspective": "stadium", + "version": "0.1.0", + "name": "VideoVendor", + "fps": 25 + }, + "tracking": { + "version": "0.1.0", + "name": "TrackingVendor", + "fps": 25, + "collection_timing": "live" + }, + "landmarks": { + "version": "0.1.0", + "name": "LimbTrackingVendor", + "fps": 25, + "collection_timing": "post" + }, + "ball": { + "version": "0.1.0", + "name": "BallTrackingVendor", + "fps": 50, + "collection_timing": "live" + }, + "event": { + "collection_timing": "live" + }, + "meta": { + "version": "0.1.0", + "name": "Meta Vendor" + }, + "cdf": { + "version": "0.2.0" + } + } + }""" + ) + outputs.tracking_data.write( + b'{"frame_id":123456, "timestamp": "2023-10-01T12:00:00Z","period":"first_half","match":{"id":"match_12345"},"ball_status": true,"teams":{"home":{"id":"team_1","players":[{"id":"player_1","x":23.5,"y":45.2},{"id":"player_2","x":56.3,"y":12.8}]},"away":{"id":"team_2","players":[{"id":"player_3","x":78.1,"y":34.9},{"id":"player_4","x":45.6,"y":89}]}},"ball":{"x":50,"y":50,"z":0.5}}' + ) return True diff --git a/kloppy/infra/serializers/tracking/serializer.py b/kloppy/infra/serializers/tracking/serializer.py index a100c37f2..a7bc72e4c 100644 --- a/kloppy/infra/serializers/tracking/serializer.py +++ b/kloppy/infra/serializers/tracking/serializer.py @@ -1,10 +1,7 @@ from abc import ABC, abstractmethod from typing import Generic, TypeVar -from kloppy.domain import ( - Provider, - TrackingDataset -) +from kloppy.domain import Provider, TrackingDataset T = TypeVar("T") diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 9f653db6e..0845b7f68 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -6,11 +6,13 @@ from kloppy import sportec from kloppy.domain import TrackingDataset -from kloppy.infra.serializers.tracking.cdf.serializer import CDFTrackingDataSerializer, CDFOutputs +from kloppy.infra.serializers.tracking.cdf.serializer import ( + CDFTrackingDataSerializer, + CDFOutputs, +) class TestCDFSerializer: - @pytest.fixture def raw_data(self, base_dir) -> Path: return base_dir / "files/sportec_positional.xml" @@ -33,32 +35,36 @@ def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: def test_produces_valid_cdf_output(self, dataset): """Test that CDFTrackingDataSerializer produces valid CDF output.""" serializer = CDFTrackingDataSerializer() - + # Create temporary files with .jsonl extension for CDF validation - with tempfile.NamedTemporaryFile(mode='w+b', suffix='.json', delete=False) as meta_file, \ - tempfile.NamedTemporaryFile(mode='w+b', suffix='.jsonl', delete=False) as tracking_file: - + with tempfile.NamedTemporaryFile( + mode="w+b", suffix=".json", delete=False + ) as meta_file, tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) as tracking_file: + outputs = CDFOutputs( - meta_data=meta_file, - tracking_data=tracking_file + meta_data=meta_file, tracking_data=tracking_file ) - + # Serialize the small Sportec dataset to CDF format success = serializer.serialize(dataset, outputs) assert success is True - + # Close files to ensure data is written meta_file.close() tracking_file.close() - + # Validate using CDF validators - tracking_validator = cdf.TrackingSchemaValidator() - meta_validator = cdf.MetaSchemaValidator() - # This throws errors on invalid data + # Validate meta data first. + tracking_validator = cdf.TrackingSchemaValidator() tracking_validator.validate_schema(sample=tracking_file.name) + + # Validate tracking data + meta_validator = cdf.MetaSchemaValidator() meta_validator.validate_schema(sample=meta_file.name) - + # Clean up temp files Path(meta_file.name).unlink() Path(tracking_file.name).unlink() From bb2e9764613aa23b6f9b1cadab5865283351dee5 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 9 Sep 2025 10:30:23 +0200 Subject: [PATCH 05/23] minor --- kloppy/tests/test_cdf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 0845b7f68..da40edc2d 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -58,13 +58,13 @@ def test_produces_valid_cdf_output(self, dataset): # Validate using CDF validators # Validate meta data first. - tracking_validator = cdf.TrackingSchemaValidator() - tracking_validator.validate_schema(sample=tracking_file.name) - - # Validate tracking data meta_validator = cdf.MetaSchemaValidator() meta_validator.validate_schema(sample=meta_file.name) + # Validate tracking data + tracking_validator = cdf.TrackingSchemaValidator() + tracking_validator.validate_schema(sample=tracking_file.name) + # Clean up temp files Path(meta_file.name).unlink() Path(tracking_file.name).unlink() From 914201614438c2a1ec9604f33034ead925ecde95 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 9 Sep 2025 12:03:37 +0200 Subject: [PATCH 06/23] fix: use patched CDF validator from koenvo fork Use bugfix/packaging branch which fixes schema file inclusion --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 04cc02172..7b74060c1 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ def setup_package(): "flask", "flask-cors", "pytest-httpserver", - "common-data-format-validator", + "common-data-format-validator @ git+https://github.com/koenvo/common-data-format-validator.git@bugfix/packaging", ], "development": ["pre-commit==2.6.0"], "query": ["networkx>=2.4,<3"], From 2654c7e8fd196b054026b84cd1de5ec960b15f2b Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Wed, 17 Sep 2025 11:13:06 +0100 Subject: [PATCH 07/23] --- kloppy/domain/models/tracking.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kloppy/domain/models/tracking.py b/kloppy/domain/models/tracking.py index df60be76f..a34cb7405 100644 --- a/kloppy/domain/models/tracking.py +++ b/kloppy/domain/models/tracking.py @@ -83,6 +83,7 @@ def frame_rate(self): @deprecated( "to_pandas will be removed in the future. Please use to_df instead." ) + def to_pandas( self, record_converter: Optional[Callable[[Frame], Dict]] = None, @@ -118,6 +119,10 @@ def generic_record_converter(frame: Frame): return pd.DataFrame.from_records( map(generic_record_converter, self.records) ) - + + @property + def to_common_data_format(self)->[object]: + + return [] __all__ = ["Frame", "TrackingDataset", "PlayerData"] From 159bec300d02bcae3b4297206b17fb6d4c6d71d4 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Wed, 17 Sep 2025 18:47:11 +0100 Subject: [PATCH 08/23] definig the CDF serilizer body --- .../serializers/tracking/cdf/__init__.py | 0 .../serializers/tracking/cdf/serializer.py | 486 ++++++++---------- 2 files changed, 214 insertions(+), 272 deletions(-) create mode 100644 kloppy/infra/serializers/tracking/cdf/__init__.py diff --git a/kloppy/infra/serializers/tracking/cdf/__init__.py b/kloppy/infra/serializers/tracking/cdf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index e16badce5..311de5185 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -28,278 +28,220 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: do a transformation if not in the right format? """ - outputs.meta_data.write( - b"""{ - "competition": { - "id": "comp_123", - "name": "Dutch Eredivisie Under 20", - "format": "league_20", - "age_restriction": null, - "type": "mens" - }, - "season": { - "id": "season_2023", - "name": "2022/23" - }, - "match": { - "id": "match_456", - "kickoff_time": "2023-05-15T19:45:00Z", - "periods": [ - { - "period": "first_half", - "play_direction": "left_right", - "start_time": "2023-05-15T19:45:00Z", - "end_time": "2023-05-15T20:30:00Z", - "start_frame_id": 0, - "end_frame_id": 27000, - "left_team_id": "team_789", - "right_team_id": "team_101" - }, - { - "period": "second_half", - "play_direction": "right_left", - "start_time": "2023-05-15T20:45:00Z", - "end_time": "2023-05-15T21:30:00Z", - "start_frame_id": 27001, - "end_frame_id": 54000, - "left_team_id": "team_101", - "right_team_id": "team_789" - } - ], - "whistles": [ - { - "type": "first_half", - "sub_type": "start", - "time": "2023-05-15T19:45:00Z" - }, - { - "type": "first_half", - "sub_type": "end", - "time": "2023-05-15T20:30:00Z" - }, - { - "type": "second_half", - "sub_type": "start", - "time": "2023-05-15T20:45:00Z" - }, - { - "type": "second_half", - "sub_type": "end", - "time": "2023-05-15T21:30:00Z" - } - ], - "round": "38", - "scheduled_kickoff_time": "2023-05-15T19:45:00Z", - "local_kickoff_time": "2023-05-15T20:45:00+01:00", - "misc": { - "country": "Netherlands", - "city": "Breda", - "percipitation": 0.5, - "is_open_roof": true - } - }, - "teams": { - "home": { - "id": "team_789", - "players": [ - { - "id": "player_1", - "team_id": "team_789", - "jersey_number": 1, - "is_starter": true - }, - { - "id": "player_2", - "team_id": "team_789", - "jersey_number": 2, - "is_starter": true - }, - { - "id": "player_3", - "team_id": "team_789", - "jersey_number": 3, - "is_starter": true - }, - { - "id": "player_4", - "team_id": "team_789", - "jersey_number": 4, - "is_starter": true - }, - { - "id": "player_5", - "team_id": "team_789", - "jersey_number": 5, - "is_starter": true - }, - { - "id": "player_6", - "team_id": "team_789", - "jersey_number": 6, - "is_starter": true - }, - { - "id": "player_7", - "team_id": "team_789", - "jersey_number": 7, - "is_starter": true - }, - { - "id": "player_8", - "team_id": "team_789", - "jersey_number": 8, - "is_starter": true - }, - { - "id": "player_9", - "team_id": "team_789", - "jersey_number": 9, - "is_starter": true - }, - { - "id": "player_10", - "team_id": "team_789", - "jersey_number": 10, - "is_starter": true - }, - { - "id": "player_11", - "team_id": "team_789", - "jersey_number": 11, - "is_starter": true - }, - { - "id": "player_12", - "team_id": "team_789", - "jersey_number": 12, - "is_starter": false - } - ] - }, - "away": { - "id": "team_101", - "players": [ - { - "id": "player_101", - "team_id": "team_101", - "jersey_number": 1, - "is_starter": true - }, - { - "id": "player_102", - "team_id": "team_101", - "jersey_number": 2, - "is_starter": true - }, - { - "id": "player_103", - "team_id": "team_101", - "jersey_number": 3, - "is_starter": true - }, - { - "id": "player_104", - "team_id": "team_101", - "jersey_number": 4, - "is_starter": true - }, - { - "id": "player_105", - "team_id": "team_101", - "jersey_number": 5, - "is_starter": true - }, - { - "id": "player_106", - "team_id": "team_101", - "jersey_number": 6, - "is_starter": true - }, - { - "id": "player_107", - "team_id": "team_101", - "jersey_number": 7, - "is_starter": true - }, - { - "id": "player_108", - "team_id": "team_101", - "jersey_number": 8, - "is_starter": true - }, - { - "id": "player_109", - "team_id": "team_101", - "jersey_number": 9, - "is_starter": true - }, - { - "id": "player_110", - "team_id": "team_101", - "jersey_number": 10, - "is_starter": true - }, - { - "id": "player_111", - "team_id": "team_101", - "jersey_number": 11, - "is_starter": true - }, - { - "id": "player_112", - "team_id": "team_101", - "jersey_number": 12, - "is_starter": false - } - ] - } - }, - "stadium": { - "id": "stadium_202", - "pitch_length": 105.0, - "pitch_width": 68.0, - "name": "A Stadium", - "turf": "grass" - }, - "meta": { - "video": { - "perspective": "stadium", - "version": "0.1.0", - "name": "VideoVendor", - "fps": 25 - }, - "tracking": { - "version": "0.1.0", - "name": "TrackingVendor", - "fps": 25, - "collection_timing": "live" - }, - "landmarks": { - "version": "0.1.0", - "name": "LimbTrackingVendor", - "fps": 25, - "collection_timing": "post" - }, - "ball": { - "version": "0.1.0", - "name": "BallTrackingVendor", - "fps": 50, - "collection_timing": "live" - }, - "event": { - "collection_timing": "live" - }, - "meta": { - "version": "0.1.0", - "name": "Meta Vendor" - }, - "cdf": { - "version": "0.2.0" - } - } - }""" + # Normalize the coordinate system + # creating the coordinate system according to the CDF paper specifications. + from kloppy.domain import ( + CustomCoordinateSystem, + Origin, + VerticalOrientation, + NormalizedPitchDimensions, + Dimension, + Orientation, + BallState + ) + # length and width of the pitch + length = dataset.metadata.pitch_dimensions.pitch_length + width = dataset.metadata.pitch_dimensions.pitch_length + CDF_coordinate_system = CustomCoordinateSystem( + origin=Origin.CENTER, + vertical_orientation=VerticalOrientation.BOTTOM_TO_TOP, + pitch_dimensions=NormalizedPitchDimensions( + x_dim=Dimension(min=-length / 2, max=length / 2), + y_dim=Dimension(min=-width / 2, max=width / 2), + pitch_length=length, + pitch_width=width, + ), + ) + # setting it as coordinate system of the imported data + dataset = dataset.transform( + to_coordinate_system=CDF_coordinate_system, + to_orientation=Orientation.STATIC_HOME_AWAY, ) - outputs.tracking_data.write( - b'{"frame_id":123456, "timestamp": "2023-10-01T12:00:00Z","period":"first_half","match":{"id":"match_12345"},"ball_status": true,"teams":{"home":{"id":"team_1","players":[{"id":"player_1","x":23.5,"y":45.2},{"id":"player_2","x":56.3,"y":12.8}]},"away":{"id":"team_2","players":[{"id":"player_3","x":78.1,"y":34.9},{"id":"player_4","x":45.6,"y":89}]}},"ball":{"x":50,"y":50,"z":0.5}}' + ##-------------------------------------------------------------------------- + + ## building Tracking jsonl + # Output containers + metadata_json = {} + + # Convert the dataset into a DataFrame + periods = { + 1: "first_half", + 2: "second_half", + 3: "first_half_extratime", + 4: "second_half_extratime", + 5: "shootout", + } + + # Get home and away team data + home_team, away_team = dataset.metadata.teams + # Get the players Id. + home_player_ids, away_player_ids = ( + [player.player_id for player in home_team.players], + [player.player_id for player in away_team.players], ) + + for frame_id in range(len([1])): + frame_data = {} + + # Frame ID + frame_data["frame_id"] = frame_id + # Timestamp + frame_data["timestamp"] = dataset[frame_id].timestamp + # Period + frame_data["period"] = periods.get(dataset[frame_id].period, "unknown") + # Match ID (placeholder) + frame_data["match"] = {"id": dataset.metadata.game_id} + # Ball status + ball_state = dataset[frame_id].ball_state + frame_data["ball_status"] = dataset[0].ball_state == BallState.ALIVE + + # Teams and players + home_players = [] + for player, coordinates in dataset[frame_id].players_coordinates.items(): + if player.player_id in home_player_ids: + try: + x = coordinates.x + y = coordinates.x + home_players.append( + {"id": player.player_id, "x": round(x, 3), "y": round(y, 3)} + ) + except KeyError: + continue + + away_players = [] + for player, coordinates in dataset[frame_id].players_coordinates.items(): + if player.player_id in away_player_ids: + try: + x = coordinates.x + y = coordinates.x + home_players.append( + {"id": player.player_id, "x": round(x, 3), "y": round(y, 3)} + ) + except KeyError: + continue + + frame_data["teams"] = { + "home": {"id": home_team.team_id, "players": home_players}, + "away": {"id": away_team.team_id, "players": away_players}, + } + + # Ball + if frame_data["ball_status"] == True: + try: + ball_x = round(dataset[0].ball_coordinates.x, 3) + ball_y = round(dataset[0].ball_coordinates.y, 3) + ball_z = round(dataset[0].ball_coordinates.z, 3) + except KeyError: + ball_x = ball_y = ball_z = None + frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} + + # Add to tracking list + outputs.tracking_data.write(frame_data) + + ### build now the metadata. + + # Copetition infos. + metadata_json["competition"] = { # w don't have any of these informations + "id": "", + "name": "", + "format": "", + "age_restriction": "null", + "type": "", + } + + # season infos. + metadata_json["season"] = { # w don't have any of these informations + "id": "", + "name": "", + } + + # match infos. + periods_info = [] + for period in dataset.metadata.periods: + curent_period = { + "period": periods[period.id], + "play_direction": "left_to_rigth", + "start_time": dataset.metadata.date + period.start_time.timestamp, + "end_time": dataset.metadata.date + period.end_time.timestamp, + "start_frame_id": ( + 0 + if period.id == 1 + else len(dataset.filter(lambda frame: frame.period.id == 1).to_df()) + ), + "end_frame_id": ( + len(dataset.filter(lambda frame: frame.period.id == period.id).to_df()) + - 1 + if period.id == 1 + else len(dataset.filter(lambda frame: frame.period.id == 1).to_df()) + + len( + dataset.filter(lambda frame: frame.period.id == period.id).to_df() + ) + - 1 + ), + "left_team_id": home_team.team_id, + "right_team_id": away_team.team_id, + } + periods_info.append(curent_period) + + ## building team_players for metadata + meta_home_players = [] + starters_ids = [] + for player, coordinates in dataset[0].players_coordinates.items(): + starters_ids.append(player.player_id) + + for player in home_team.players: + try: + home_players.append( + { + "id": player.player_id, + "team_id": home_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + meta_away_players = [] + for player in away_team.players: + try: + away_players.append( + { + "id": player.player_id, + "team_id": away_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + metadata_json["match"] = { + "match": { + "id": dataset.metadata.game_id, # same as for the jsonl + "kickoff_time": dataset.metadata.periods[0].start_time, + "periods": periods_info, + "round": "38", + "scheduled_kickoff_time": "", # how to get this ? + "local_kickoff_time": "", # how to get this ? + "misc": { + "country": "", # how to get this ? + "city": "", # how to get this ? + "percipitation": 0.5, # how to get this ? + "is_open_roof": True, # how to get this ? + }, + }, + "teams": { + "home": { + "id": home_team.team_id, # same as for the jsonl + "players": meta_home_players, + }, + "away": { + "id": away_team.team_id, # same as for the jsonl + "players": meta_away_players, + }, + }, + } + + outputs.meta_data.write(metadata_json) return True From 55c23cdf9d95f19466fe0cd5539dc668c155a1b7 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Wed, 17 Sep 2025 22:43:03 +0100 Subject: [PATCH 09/23] try the test --- kloppy/tests/test_cdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index da40edc2d..a88545aff 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -2,7 +2,7 @@ from pathlib import Path import pytest -import cdf +from kloppy.infra.serializers.tracking import cdf from kloppy import sportec from kloppy.domain import TrackingDataset From 50c612a903b690096e7bfbeb5ef462edbca38e7c Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Mon, 22 Sep 2025 04:55:50 +0100 Subject: [PATCH 10/23] minor --- kloppy/infra/serializers/tracking/cdf/serializer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 311de5185..e5e77b528 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -227,7 +227,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "misc": { "country": "", # how to get this ? "city": "", # how to get this ? - "percipitation": 0.5, # how to get this ? + "percipitation": 0, # how to get this ? "is_open_roof": True, # how to get this ? }, }, From bf71e4e0d92e887e2078f9e531b18dc8cf1d82b7 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Mon, 22 Sep 2025 10:17:38 +0100 Subject: [PATCH 11/23] fix the bug related to Time and datime.datime type. Also fixed the test path to the validators --- .../serializers/tracking/cdf/serializer.py | 124 ++++++++++++++---- kloppy/tests/test_cdf.py | 7 +- 2 files changed, 102 insertions(+), 29 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index e5e77b528..e7583c8e7 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -1,6 +1,8 @@ +import json +from datetime import timedelta from typing import IO, NamedTuple -from kloppy.domain import Provider, TrackingDataset +from kloppy.domain import Provider, TrackingDataset, Time from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer @@ -12,6 +14,22 @@ class CDFOutputs(NamedTuple): class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): provider = Provider.CDF + @staticmethod + def default_serializer(obj): + "handle timedelta and Time type during serialization." + if isinstance(obj, timedelta): + return obj.total_seconds() + if isinstance(obj, Time): + return { + "period_id": obj.period.id, + "start": obj.period.start_timestamp.total_seconds(), + "end": obj.period.end_timestamp.total_seconds(), + "timestamp": obj.timestamp.total_seconds(), + } + raise TypeError(f"Type {type(obj)} not serializable") + + + def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: """ Serialize a TrackingDataset to Common Data Format. @@ -25,7 +43,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: Note: TODO: Open question: should the serializer make sure the data is in the right format, and - do a transformation if not in the right format? + do a transformation if not in the right format? yes normally. """ # Normalize the coordinate system @@ -37,8 +55,9 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: NormalizedPitchDimensions, Dimension, Orientation, - BallState + BallState, ) + # length and width of the pitch length = dataset.metadata.pitch_dimensions.pitch_length width = dataset.metadata.pitch_dimensions.pitch_length @@ -62,8 +81,9 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ## building Tracking jsonl # Output containers metadata_json = {} + # tracking_datas = [] use if we want to manage all the frames - # Convert the dataset into a DataFrame + # list of different periods within a game periods = { 1: "first_half", 2: "second_half", @@ -80,42 +100,61 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: [player.player_id for player in away_team.players], ) - for frame_id in range(len([1])): + for frame_id in range( + len([1]) + ): # change this when we would like to manage all the frames frame_data = {} # Frame ID frame_data["frame_id"] = frame_id # Timestamp - frame_data["timestamp"] = dataset[frame_id].timestamp + frame_data["timestamp"] = dataset[ + frame_id + ].timestamp.total_seconds() # Period - frame_data["period"] = periods.get(dataset[frame_id].period, "unknown") + frame_data["period"] = periods.get( + dataset[frame_id].period, "unknown" + ) # Match ID (placeholder) frame_data["match"] = {"id": dataset.metadata.game_id} # Ball status - ball_state = dataset[frame_id].ball_state - frame_data["ball_status"] = dataset[0].ball_state == BallState.ALIVE + frame_data["ball_status"] = ( + dataset[0].ball_state == BallState.ALIVE + ) # Teams and players home_players = [] - for player, coordinates in dataset[frame_id].players_coordinates.items(): + for player, coordinates in dataset[ + frame_id + ].players_coordinates.items(): if player.player_id in home_player_ids: try: x = coordinates.x y = coordinates.x home_players.append( - {"id": player.player_id, "x": round(x, 3), "y": round(y, 3)} + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + } ) except KeyError: continue away_players = [] - for player, coordinates in dataset[frame_id].players_coordinates.items(): + for player, coordinates in dataset[ + frame_id + ].players_coordinates.items(): if player.player_id in away_player_ids: try: x = coordinates.x y = coordinates.x - home_players.append( - {"id": player.player_id, "x": round(x, 3), "y": round(y, 3)} + away_players.append( + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + } ) except KeyError: continue @@ -135,13 +174,21 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ball_x = ball_y = ball_z = None frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} - # Add to tracking list - outputs.tracking_data.write(frame_data) + # normally here when we will use all the frames we are suppose to add them successivelly to a list that we will then write as tracking data outputs + + # Add to tracking list + outputs.tracking_data.write( + ( + json.dumps(frame_data, default=self.default_serializer) + "\n" + ).encode("utf-8") + ) ### build now the metadata. - # Copetition infos. - metadata_json["competition"] = { # w don't have any of these informations + # Competition infos. + metadata_json[ + "competition" + ] = { # we don't have any of these informations "id": "", "name": "", "format": "", @@ -150,7 +197,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: } # season infos. - metadata_json["season"] = { # w don't have any of these informations + metadata_json["season"] = { # we don't have any of these informations "id": "", "name": "", } @@ -161,20 +208,38 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: curent_period = { "period": periods[period.id], "play_direction": "left_to_rigth", - "start_time": dataset.metadata.date + period.start_time.timestamp, - "end_time": dataset.metadata.date + period.end_time.timestamp, + "start_time": ( + dataset.metadata.date + period.start_time.timestamp + ).timestamp(), + "end_time": ( + dataset.metadata.date + period.end_time.timestamp + ).timestamp(), "start_frame_id": ( 0 if period.id == 1 - else len(dataset.filter(lambda frame: frame.period.id == 1).to_df()) + else len( + dataset.filter( + lambda frame: frame.period.id == 1 + ).to_df() + ) ), "end_frame_id": ( - len(dataset.filter(lambda frame: frame.period.id == period.id).to_df()) + len( + dataset.filter( + lambda frame: frame.period.id == period.id + ).to_df() + ) - 1 if period.id == 1 - else len(dataset.filter(lambda frame: frame.period.id == 1).to_df()) + else len( + dataset.filter( + lambda frame: frame.period.id == 1 + ).to_df() + ) + len( - dataset.filter(lambda frame: frame.period.id == period.id).to_df() + dataset.filter( + lambda frame: frame.period.id == period.id + ).to_df() ) - 1 ), @@ -243,5 +308,12 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: }, } - outputs.meta_data.write(metadata_json) + outputs.meta_data.write( + ( + json.dumps(metadata_json, default=self.default_serializer) + + "\n" + ).encode("utf-8") + ) + + return True diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index a88545aff..4f0c04b1e 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -2,7 +2,8 @@ from pathlib import Path import pytest -from kloppy.infra.serializers.tracking import cdf +#from kloppy.infra.serializers.tracking import cdf +import cdf from kloppy import sportec from kloppy.domain import TrackingDataset @@ -58,11 +59,11 @@ def test_produces_valid_cdf_output(self, dataset): # Validate using CDF validators # Validate meta data first. - meta_validator = cdf.MetaSchemaValidator() + meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/schema/meta_v0.2.0.json") meta_validator.validate_schema(sample=meta_file.name) # Validate tracking data - tracking_validator = cdf.TrackingSchemaValidator() + tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/schema/tracking_v0.2.0.json") tracking_validator.validate_schema(sample=tracking_file.name) # Clean up temp files From 1ef12167905914f6ae2d6fa7a973a6fa062bd81f Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Tue, 23 Sep 2025 09:07:43 +0100 Subject: [PATCH 12/23] minor --- .../serializers/tracking/cdf/serializer.py | 82 +++++++++++++------ kloppy/tests/test_cdf.py | 30 +++++-- 2 files changed, 83 insertions(+), 29 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index e7583c8e7..c19ddb016 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -18,7 +18,7 @@ class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): def default_serializer(obj): "handle timedelta and Time type during serialization." if isinstance(obj, timedelta): - return obj.total_seconds() + return obj.total_seconds() if isinstance(obj, Time): return { "period_id": obj.period.id, @@ -28,8 +28,6 @@ def default_serializer(obj): } raise TypeError(f"Type {type(obj)} not serializable") - - def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: """ Serialize a TrackingDataset to Common Data Format. @@ -113,10 +111,10 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ].timestamp.total_seconds() # Period frame_data["period"] = periods.get( - dataset[frame_id].period, "unknown" + dataset[frame_id].period.id, "unknown" ) # Match ID (placeholder) - frame_data["match"] = {"id": dataset.metadata.game_id} + frame_data["match"] = {"id": str(dataset.metadata.game_id)} # Ball status frame_data["ball_status"] = ( dataset[0].ball_state == BallState.ALIVE @@ -160,19 +158,44 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: continue frame_data["teams"] = { - "home": {"id": home_team.team_id, "players": home_players}, - "away": {"id": away_team.team_id, "players": away_players}, + "home": { + "id": home_team.team_id, + "players": home_players, + "jersey_color": "Null", + "name": home_team.name, + "formation": ( + "Null" + if str(home_team.starting_formation) == None + else str(home_team.starting_formation) + ), + }, + "away": { + "id": away_team.team_id, + "players": away_players, + "jersey_color": "Null", + "name": away_team.name, + "formation": ( + "Null" + if str(away_team.starting_formation) == None + else str(away_team.starting_formation) + ), + }, } # Ball if frame_data["ball_status"] == True: try: - ball_x = round(dataset[0].ball_coordinates.x, 3) - ball_y = round(dataset[0].ball_coordinates.y, 3) - ball_z = round(dataset[0].ball_coordinates.z, 3) + ball_x = round(dataset[frame_id].ball_coordinates.x, 3) + ball_y = round(dataset[frame_id].ball_coordinates.y, 3) + ball_z = round(dataset[frame_id].ball_coordinates.z, 3) except KeyError: ball_x = ball_y = ball_z = None - frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} + else: + ball_x = ball_y = ball_z = ( + dataset.metadata.pitch_dimensions.pitch_length + 10 + ) + + frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} # normally here when we will use all the frames we are suppose to add them successivelly to a list that we will then write as tracking data outputs @@ -186,15 +209,15 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ### build now the metadata. # Competition infos. - metadata_json[ - "competition" - ] = { # we don't have any of these informations - "id": "", - "name": "", - "format": "", - "age_restriction": "null", - "type": "", - } + metadata_json["competition"] = ( + { # we don't have any of these informations + "id": "", + "name": "", + "format": "", + "age_restriction": "null", + "type": "", + } + ) # season infos. metadata_json["season"] = { # we don't have any of these informations @@ -256,7 +279,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: for player in home_team.players: try: - home_players.append( + meta_home_players.append( { "id": player.player_id, "team_id": home_team.team_id, @@ -270,7 +293,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: meta_away_players = [] for player in away_team.players: try: - away_players.append( + meta_away_players.append( { "id": player.player_id, "team_id": away_team.team_id, @@ -300,10 +323,24 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "home": { "id": home_team.team_id, # same as for the jsonl "players": meta_home_players, + "jersey_color": "null", + "name": home_team.name, + "formation": ( + "null" + if str(home_team.starting_formation) == None + else str(home_team.starting_formation) + ), }, "away": { "id": away_team.team_id, # same as for the jsonl "players": meta_away_players, + "jersey_color": "null", + "name": away_team.name, + "formation": ( + "null" + if str(away_team.starting_formation) == None + else str(home_team.starting_formation) + ), }, }, } @@ -315,5 +352,4 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ).encode("utf-8") ) - return True diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 4f0c04b1e..130b8ff63 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -2,7 +2,6 @@ from pathlib import Path import pytest -#from kloppy.infra.serializers.tracking import cdf import cdf from kloppy import sportec @@ -12,7 +11,6 @@ CDFOutputs, ) - class TestCDFSerializer: @pytest.fixture def raw_data(self, base_dir) -> Path: @@ -32,6 +30,25 @@ def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: limit=None, only_alive=False, ) + + # from kloppy import pff + + # # Path to data + # roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" + # metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" + # raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" + + # # Loading + # dataset = pff.load_tracking( + # raw_data=raw_data_path, + # meta_data=metadata_path, + # roster_meta_data=roster_path, + # coordinates="pff", + # limit=10, # only ten frames even if we are just gona use one of them. + # sample_rate=None, + # ) + + # return dataset def test_produces_valid_cdf_output(self, dataset): """Test that CDFTrackingDataSerializer produces valid CDF output.""" @@ -58,14 +75,15 @@ def test_produces_valid_cdf_output(self, dataset): # Validate using CDF validators - # Validate meta data first. - meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/schema/meta_v0.2.0.json") - meta_validator.validate_schema(sample=meta_file.name) - # Validate tracking data tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/schema/tracking_v0.2.0.json") tracking_validator.validate_schema(sample=tracking_file.name) + # Validate meta data first. + meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/schema/meta_v0.2.0.json") + meta_validator.validate_schema(sample=meta_file.name) + + # Clean up temp files Path(meta_file.name).unlink() Path(tracking_file.name).unlink() From 1c8a6eb936089421bdcbf53e4180c1dc1fb0e5b2 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Fri, 26 Sep 2025 11:43:35 +0100 Subject: [PATCH 13/23] pass test successfully. --- .../serializers/tracking/cdf/serializer.py | 203 +++++++++++------- kloppy/tests/test_cdf.py | 48 ++--- 2 files changed, 150 insertions(+), 101 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index c19ddb016..2b313709a 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -2,7 +2,7 @@ from datetime import timedelta from typing import IO, NamedTuple -from kloppy.domain import Provider, TrackingDataset, Time +from kloppy.domain import Provider, TrackingDataset, Time, PositionType from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer @@ -14,19 +14,42 @@ class CDFOutputs(NamedTuple): class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): provider = Provider.CDF + # to infer the starting formation if not given @staticmethod - def default_serializer(obj): - "handle timedelta and Time type during serialization." - if isinstance(obj, timedelta): - return obj.total_seconds() - if isinstance(obj, Time): - return { - "period_id": obj.period.id, - "start": obj.period.start_timestamp.total_seconds(), - "end": obj.period.end_timestamp.total_seconds(), - "timestamp": obj.timestamp.total_seconds(), - } - raise TypeError(f"Type {type(obj)} not serializable") + def get_starting_formation(list_players, team) -> str: + formation = "" + defender = midfiler = attacker = 0 + + for player in list_players: + if ( + team.get_player_by_id(player["id"]).starting_position.parent + == None + ): + continue + elif ( + team.get_player_by_id(player["id"]).starting_position.parent + == PositionType.Attacker + ): + attacker += 1 + elif ( + team.get_player_by_id(player["id"]).starting_position.parent + == PositionType.Midfielder + or team.get_player_by_id( + player["id"] + ).starting_position.parent.parent + == PositionType.Midfielder + ): + midfiler += 1 + elif ( + team.get_player_by_id( + player["id"] + ).starting_position.parent.parent + == PositionType.Defender + ): + defender += 1 + if defender + midfiler + attacker == 10: + formation = f"{defender}_{midfiler}_{attacker}" + return formation def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: """ @@ -56,9 +79,10 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: BallState, ) - # length and width of the pitch + # length and width of the pitch imported pitch length = dataset.metadata.pitch_dimensions.pitch_length width = dataset.metadata.pitch_dimensions.pitch_length + # build the cdf normalize coordinate system CDF_coordinate_system = CustomCoordinateSystem( origin=Origin.CENTER, vertical_orientation=VerticalOrientation.BOTTOM_TO_TOP, @@ -81,7 +105,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: metadata_json = {} # tracking_datas = [] use if we want to manage all the frames - # list of different periods within a game + # list of different periods within a game define by the cdf periods = { 1: "first_half", 2: "second_half", @@ -92,6 +116,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # Get home and away team data home_team, away_team = dataset.metadata.teams + # Get the players Id. home_player_ids, away_player_ids = ( [player.player_id for player in home_team.players], @@ -106,14 +131,14 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # Frame ID frame_data["frame_id"] = frame_id # Timestamp - frame_data["timestamp"] = dataset[ - frame_id - ].timestamp.total_seconds() + frame_data["timestamp"] = str( + dataset.metadata.date + dataset[frame_id].timestamp + ) # Period frame_data["period"] = periods.get( - dataset[frame_id].period.id, "unknown" + dataset[frame_id].period.id, "unknownn" ) - # Match ID (placeholder) + # Match ID frame_data["match"] = {"id": str(dataset.metadata.game_id)} # Ball status frame_data["ball_status"] = ( @@ -134,6 +159,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "id": player.player_id, "x": round(x, 3), "y": round(y, 3), + "position": player.starting_position.code, } ) except KeyError: @@ -157,27 +183,35 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: except KeyError: continue + # asumption + default_formation = "4-3-3" + + # teams within the tracking data. frame_data["teams"] = { "home": { "id": home_team.team_id, "players": home_players, - "jersey_color": "Null", + "jersey_color": " ", # "name": home_team.name, "formation": ( - "Null" - if str(home_team.starting_formation) == None - else str(home_team.starting_formation) + home_team.formations.at_start() + if home_team.formations.items + else self.get_starting_formation( + home_players, home_team + ) ), }, "away": { "id": away_team.team_id, "players": away_players, - "jersey_color": "Null", + "jersey_color": " ", "name": away_team.name, "formation": ( - "Null" - if str(away_team.starting_formation) == None - else str(away_team.starting_formation) + away_team.formations.at_start() + if home_team.formations.items + else self.get_starting_formation( + away_players, away_team + ) ), }, } @@ -197,24 +231,22 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} - # normally here when we will use all the frames we are suppose to add them successivelly to a list that we will then write as tracking data outputs - + # normally here when we will use all the frames we are suppose to add them successivelly to a list that we will then write as tracking data outputs + # but with only one frame we just dumpit in a json buffured. # Add to tracking list outputs.tracking_data.write( - ( - json.dumps(frame_data, default=self.default_serializer) + "\n" - ).encode("utf-8") + (json.dumps(frame_data) + "\n").encode("utf-8") ) + ################################################ ### build now the metadata. - # Competition infos. metadata_json["competition"] = ( { # we don't have any of these informations "id": "", "name": "", "format": "", - "age_restriction": "null", + "age_restriction": "16", "type": "", } ) @@ -230,7 +262,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: for period in dataset.metadata.periods: curent_period = { "period": periods[period.id], - "play_direction": "left_to_rigth", + "play_direction": "left_right", "start_time": ( dataset.metadata.date + period.start_time.timestamp ).timestamp(), @@ -271,7 +303,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: } periods_info.append(curent_period) - ## building team_players for metadata + ## building team_players for metadata meta_home_players = [] starters_ids = [] for player, coordinates in dataset[0].players_coordinates.items(): @@ -304,52 +336,69 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: except KeyError: continue + whistles_types = ["first_half", "second_half "] + metadata_json["match"] = { - "match": { - "id": dataset.metadata.game_id, # same as for the jsonl - "kickoff_time": dataset.metadata.periods[0].start_time, - "periods": periods_info, - "round": "38", - "scheduled_kickoff_time": "", # how to get this ? - "local_kickoff_time": "", # how to get this ? - "misc": { - "country": "", # how to get this ? - "city": "", # how to get this ? - "percipitation": 0, # how to get this ? - "is_open_roof": True, # how to get this ? - }, + "id": str(dataset.metadata.game_id), # same as for the jsonl + "kickoff_time": str(dataset.metadata.periods[0].start_time), + "periods": periods_info, + "whistles": [ + { + "type": whistles_types[0], + "sub_type": "start", + "time": str(dataset.metadata.periods[0].start_time), + } + ], # fake just to pass the test, I have to change this after. + "round": "38", + "scheduled_kickoff_time": str( + dataset.metadata.date + ), # how to get this ? + "local_kickoff_time": "", # how to get this ? + "misc": { + "country": "", # how to get this ? + "city": "", # how to get this ? + "percipitation": 0, # how to get this ? + "is_open_roof": True, # how to get this ? }, - "teams": { - "home": { - "id": home_team.team_id, # same as for the jsonl - "players": meta_home_players, - "jersey_color": "null", - "name": home_team.name, - "formation": ( - "null" - if str(home_team.starting_formation) == None - else str(home_team.starting_formation) - ), - }, - "away": { - "id": away_team.team_id, # same as for the jsonl - "players": meta_away_players, - "jersey_color": "null", - "name": away_team.name, - "formation": ( - "null" - if str(away_team.starting_formation) == None - else str(home_team.starting_formation) - ), - }, + } + + metadata_json["teams"] = { + "home": { + "id": home_team.team_id, # same as for the jsonl + "players": meta_home_players, + "jersey_color": " ", + "name": home_team.name, + "formation": home_team.starting_formation + or self.get_starting_formation(meta_home_players, home_team), }, + "away": { + "id": away_team.team_id, # same as for the jsonl + "players": meta_away_players, + "jersey_color": " ", + "name": away_team.name, + "formation": away_team.starting_formation + or self.get_starting_formation(meta_away_players, away_team), + }, + } + + metadata_json["stadium"] = { + "id": "", + "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, + "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, + "name": "", + "turf": "", + } + + metadata_json["meta"] = { + "video": None, + "tracking": None, + "limb": None, + "meta": None, + "cdf": None, } outputs.meta_data.write( - ( - json.dumps(metadata_json, default=self.default_serializer) - + "\n" - ).encode("utf-8") + (json.dumps(metadata_json) + "\n").encode("utf-8") ) return True diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 130b8ff63..bcb3eb13e 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -23,32 +23,32 @@ def meta_data(self, base_dir) -> Path: @pytest.fixture def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: """Load a small Sportec tracking data snippet for testing CDF serialization.""" - return sportec.load_tracking( - raw_data=raw_data, - meta_data=meta_data, - coordinates="sportec", - limit=None, - only_alive=False, - ) - - # from kloppy import pff - - # # Path to data - # roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" - # metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" - # raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" - - # # Loading - # dataset = pff.load_tracking( - # raw_data=raw_data_path, - # meta_data=metadata_path, - # roster_meta_data=roster_path, - # coordinates="pff", - # limit=10, # only ten frames even if we are just gona use one of them. - # sample_rate=None, + # return sportec.load_tracking( + # raw_data=raw_data, + # meta_data=meta_data, + # coordinates="sportec", + # limit=None, + # only_alive=False, # ) + + from kloppy import pff + + # Path to data + roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" + metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" + raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" + + # Loading + dataset = pff.load_tracking( + raw_data=raw_data_path, + meta_data=metadata_path, + roster_meta_data=roster_path, + coordinates="pff", + limit=10, # only ten frames even if we are just gona use one of them. + sample_rate=None, + ) - # return dataset + return dataset def test_produces_valid_cdf_output(self, dataset): """Test that CDFTrackingDataSerializer produces valid CDF output.""" From 13bd23f70817d5d5db2596cdd00d2dbf92d944fa Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Fri, 26 Sep 2025 12:33:09 +0100 Subject: [PATCH 14/23] handle whistles --- .../serializers/tracking/cdf/serializer.py | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 2b313709a..770667b67 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -263,12 +263,12 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: curent_period = { "period": periods[period.id], "play_direction": "left_right", - "start_time": ( + "start_time": str( dataset.metadata.date + period.start_time.timestamp - ).timestamp(), - "end_time": ( + ), + "end_time": str( dataset.metadata.date + period.end_time.timestamp - ).timestamp(), + ), "start_frame_id": ( 0 if period.id == 1 @@ -336,23 +336,28 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: except KeyError: continue - whistles_types = ["first_half", "second_half "] + # get whistles related to period directly from them. + whistles = [] + for period in periods_info: + whistle_start = whistle_end = {} + # type + whistle_start["type"] = whistle_end["type"] = period["period"] + # sub_type + whistle_start["sub_type"] = "start" + whistle_end["sub_type"] = "end" + # time + whistle_start["time"] = period["start_time"] + whistle_end["time"] = period["end_time"] + whistles.append(whistle_start) + whistles.append(whistle_end) metadata_json["match"] = { "id": str(dataset.metadata.game_id), # same as for the jsonl "kickoff_time": str(dataset.metadata.periods[0].start_time), "periods": periods_info, - "whistles": [ - { - "type": whistles_types[0], - "sub_type": "start", - "time": str(dataset.metadata.periods[0].start_time), - } - ], # fake just to pass the test, I have to change this after. - "round": "38", - "scheduled_kickoff_time": str( - dataset.metadata.date - ), # how to get this ? + "whistles": whistles, # fake just to pass the test, I have to change this after. + "round": "", + "scheduled_kickoff_time": str(dataset.metadata.date), "local_kickoff_time": "", # how to get this ? "misc": { "country": "", # how to get this ? From a2ee57f6e3beaa12f093d25dba03fd7380e3d778 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Mon, 29 Sep 2025 11:31:20 +0100 Subject: [PATCH 15/23] start working on the serializer for the event data. --- .../infra/serializers/event/cdf/__init__.py | 0 .../infra/serializers/event/cdf/serializer.py | 409 ++++++++++++++++++ .../serializers/tracking/cdf/serializer.py | 27 +- kloppy/tests/test_cdf.py | 48 +- 4 files changed, 447 insertions(+), 37 deletions(-) create mode 100644 kloppy/infra/serializers/event/cdf/__init__.py create mode 100644 kloppy/infra/serializers/event/cdf/serializer.py diff --git a/kloppy/infra/serializers/event/cdf/__init__.py b/kloppy/infra/serializers/event/cdf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/kloppy/infra/serializers/event/cdf/serializer.py b/kloppy/infra/serializers/event/cdf/serializer.py new file mode 100644 index 000000000..4e99fb737 --- /dev/null +++ b/kloppy/infra/serializers/event/cdf/serializer.py @@ -0,0 +1,409 @@ +import json +from datetime import timedelta +from typing import IO, NamedTuple + +from kloppy.domain import Provider, EventDataset, Time, PositionType +from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer + + +class CDFEventOutputs(NamedTuple): + event_data: IO[bytes] + + +class CDFTrackingDataSerializer(TrackingDataSerializer[CDFEventOutputs]): + provider = Provider.CDF + + # to infer the starting formation if not given + @staticmethod + def get_starting_formation(list_players, team) -> str: + formation = "" + defender = midfiler = attacker = 0 + + for player in list_players: + if ( + team.get_player_by_id(player["id"]).starting_position.parent + == None + ): + continue + elif ( + team.get_player_by_id(player["id"]).starting_position.parent + == PositionType.Attacker + ): + attacker += 1 + elif ( + team.get_player_by_id(player["id"]).starting_position.parent + == PositionType.Midfielder + or team.get_player_by_id( + player["id"] + ).starting_position.parent.parent + == PositionType.Midfielder + ): + midfiler += 1 + elif ( + team.get_player_by_id( + player["id"] + ).starting_position.parent.parent + == PositionType.Defender + ): + defender += 1 + if defender + midfiler + attacker == 10: + formation = f"{defender}_{midfiler}_{attacker}" + return formation + + def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: + """ + Serialize a TrackingDataset to Common Data Format. + + Args: + dataset: The tracking dataset to serialize + outputs: CDFOutputs containing file handles for metadata and tracking data + + Returns: + bool: True if serialization was successful, False otherwise + + Note: + TODO: Open question: should the serializer make sure the data is in the right format, and + do a transformation if not in the right format? yes normally. + """ + + # Normalize the coordinate system + # creating the coordinate system according to the CDF paper specifications. + from kloppy.domain import ( + CustomCoordinateSystem, + Origin, + VerticalOrientation, + NormalizedPitchDimensions, + Dimension, + Orientation, + BallState, + ) + + # length and width of the pitch imported pitch + length = dataset.metadata.pitch_dimensions.pitch_length + width = dataset.metadata.pitch_dimensions.pitch_width + # build the cdf normalize coordinate system + CDF_coordinate_system = CustomCoordinateSystem( + origin=Origin.CENTER, + vertical_orientation=VerticalOrientation.BOTTOM_TO_TOP, + pitch_dimensions=NormalizedPitchDimensions( + x_dim=Dimension(min=-length / 2, max=length / 2), + y_dim=Dimension(min=-width / 2, max=width / 2), + pitch_length=length, + pitch_width=width, + ), + ) + # setting it as coordinate system of the imported data + dataset = dataset.transform( + to_coordinate_system=CDF_coordinate_system, + to_orientation=Orientation.STATIC_HOME_AWAY, + ) + ##-------------------------------------------------------------------------- + + ## building Tracking jsonl + # Output containers + metadata_json = {} + # tracking_datas = [] use if we want to manage all the frames + + # list of different periods within a game define by the cdf + periods = { + 1: "first_half", + 2: "second_half", + 3: "first_half_extratime", + 4: "second_half_extratime", + 5: "shootout", + } + + # Get home and away team data + home_team, away_team = dataset.metadata.teams + + # Get the players Id. + home_player_ids, away_player_ids = ( + [player.player_id for player in home_team.players], + [player.player_id for player in away_team.players], + ) + + for frame_id in range( + len([1]) + ): # change this when we would like to manage all the frames + frame_data = {} + + # Frame ID + frame_data["frame_id"] = frame_id + # Timestamp + frame_data["timestamp"] = str( + dataset.metadata.date + dataset[frame_id].timestamp + ) + # Period + frame_data["period"] = periods.get( + dataset[frame_id].period.id, "unknownn" + ) + # Match ID + frame_data["match"] = {"id": str(dataset.metadata.game_id)} + # Ball status + frame_data["ball_status"] = ( + dataset[0].ball_state == BallState.ALIVE + ) + + # Teams and players + home_players = [] + for player, coordinates in dataset[ + frame_id + ].players_coordinates.items(): + if player.player_id in home_player_ids: + try: + x = coordinates.x + y = coordinates.x + home_players.append( + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + "position": player.starting_position.code, + } + ) + except KeyError: + continue + + away_players = [] + for player, coordinates in dataset[ + frame_id + ].players_coordinates.items(): + if player.player_id in away_player_ids: + try: + x = coordinates.x + y = coordinates.x + away_players.append( + { + "id": player.player_id, + "x": round(x, 3), + "y": round(y, 3), + "position": player.starting_position.code, + } + ) + except KeyError: + continue + + # teams within the tracking data. + frame_data["teams"] = { + "home": { + "id": home_team.team_id, + "players": home_players, + "jersey_color": " ", # + "name": home_team.name, + "formation": ( + home_team.formations.at_start() + if home_team.formations.items + else self.get_starting_formation( + home_players, home_team + ) + ), + }, + "away": { + "id": away_team.team_id, + "players": away_players, + "jersey_color": " ", + "name": away_team.name, + "formation": ( + away_team.formations.at_start() + if away_team.formations.items + else self.get_starting_formation( + away_players, away_team + ) + ), + }, + } + + # Ball + if frame_data["ball_status"] == True: + try: + ball_x = round(dataset[frame_id].ball_coordinates.x, 3) + ball_y = round(dataset[frame_id].ball_coordinates.y, 3) + ball_z = round(dataset[frame_id].ball_coordinates.z, 3) + except KeyError: + ball_x = ball_y = ball_z = None + else: + ball_x = ball_y = ball_z = ( + dataset.metadata.pitch_dimensions.pitch_length + 10 + ) + + frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} + + # normally here when we will use all the frames we are suppose to add them successivelly to a list that we will then write as tracking data outputs + # but with only one frame we just dumpit in a json buffured. + # Add to tracking list + outputs.tracking_data.write( + (json.dumps(frame_data) + "\n").encode("utf-8") + ) + + ################################################ + ### build now the metadata. + # Competition infos. + metadata_json["competition"] = ( + { # we don't have any of these informations + "id": "", + "name": "", + "format": "", + "age_restriction": "", + "type": "", + } + ) + + # season infos. + metadata_json["season"] = { # we don't have any of these informations + "id": "", + "name": "", + } + + # match infos. + periods_info = [] + for period in dataset.metadata.periods: + curent_period = { + "period": periods[period.id], + "play_direction": "left_right", + "start_time": str( + dataset.metadata.date + period.start_timestamp + ), + "end_time": str(dataset.metadata.date + period.end_timestamp), + "start_frame_id": ( + 0 + if period.id == 1 + else len( + dataset.filter( + lambda frame: frame.period.id == 1 + ).to_df() + ) + ), + "end_frame_id": ( + len( + dataset.filter( + lambda frame: frame.period.id == period.id + ).to_df() + ) + - 1 + if period.id == 1 + else len( + dataset.filter( + lambda frame: frame.period.id == 1 + ).to_df() + ) + + len( + dataset.filter( + lambda frame: frame.period.id == period.id + ).to_df() + ) + - 1 + ), + "left_team_id": home_team.team_id, + "right_team_id": away_team.team_id, + } + periods_info.append(curent_period) + + ## building team_players for metadata + meta_home_players = [] + starters_ids = [] + for player, coordinates in dataset[0].players_coordinates.items(): + starters_ids.append(player.player_id) + + for player in home_team.players: + try: + meta_home_players.append( + { + "id": player.player_id, + "team_id": home_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + meta_away_players = [] + for player in away_team.players: + try: + meta_away_players.append( + { + "id": player.player_id, + "team_id": away_team.team_id, + "jersey_number": player.jersey_no, + "is_starter": player.player_id in starters_ids, + } + ) + except KeyError: + continue + + # get whistles related to period directly from them. + whistles = [] + for period in periods_info: + whistle_start = {} + whistle_end = {} + # type + whistle_start["type"] = period["period"] + whistle_end["type"] = period["period"] + # sub_type + whistle_start["sub_type"] = "start" + whistle_end["sub_type"] = "end" + # time + whistle_start["time"] = period["start_time"] + whistle_end["time"] = period["end_time"] + whistles.append(whistle_start) + whistles.append(whistle_end) + + metadata_json["match"] = { + "id": str(dataset.metadata.game_id), # same as for the jsonl + "kickoff_time": str( + dataset.metadata.date + + dataset.metadata.periods[0].start_timestamp + ), + "periods": periods_info, + "whistles": whistles, # fake just to pass the test, I have to change this after. + "round": "", + "scheduled_kickoff_time": str(dataset.metadata.date), + "local_kickoff_time": "", # how to get this ? + "misc": { + "country": "", # how to get this ? + "city": "", # how to get this ? + "percipitation": 0, # how to get this ? + "is_open_roof": True, # how to get this ? + }, + } + + metadata_json["teams"] = { + "home": { + "id": home_team.team_id, # same as for the jsonl + "players": meta_home_players, + "jersey_color": " ", + "name": home_team.name, + "formation": home_team.starting_formation + or self.get_starting_formation(meta_home_players, home_team), + }, + "away": { + "id": away_team.team_id, # same as for the jsonl + "players": meta_away_players, + "jersey_color": " ", + "name": away_team.name, + "formation": away_team.starting_formation + or self.get_starting_formation(meta_away_players, away_team), + }, + } + + metadata_json["stadium"] = { + "id": "", + "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, + "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, + "name": "", + "turf": "", + } + + metadata_json["meta"] = { + "video": None, + "tracking": None, + "limb": None, + "meta": None, + "cdf": None, + } + + outputs.meta_data.write( + (json.dumps(metadata_json) + "\n").encode("utf-8") + ) + + return True diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 770667b67..c59d10e1a 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -81,7 +81,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # length and width of the pitch imported pitch length = dataset.metadata.pitch_dimensions.pitch_length - width = dataset.metadata.pitch_dimensions.pitch_length + width = dataset.metadata.pitch_dimensions.pitch_width # build the cdf normalize coordinate system CDF_coordinate_system = CustomCoordinateSystem( origin=Origin.CENTER, @@ -178,14 +178,12 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "id": player.player_id, "x": round(x, 3), "y": round(y, 3), + "position": player.starting_position.code, } ) except KeyError: continue - # asumption - default_formation = "4-3-3" - # teams within the tracking data. frame_data["teams"] = { "home": { @@ -208,7 +206,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "name": away_team.name, "formation": ( away_team.formations.at_start() - if home_team.formations.items + if away_team.formations.items else self.get_starting_formation( away_players, away_team ) @@ -246,7 +244,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "id": "", "name": "", "format": "", - "age_restriction": "16", + "age_restriction": "", "type": "", } ) @@ -264,11 +262,9 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "period": periods[period.id], "play_direction": "left_right", "start_time": str( - dataset.metadata.date + period.start_time.timestamp - ), - "end_time": str( - dataset.metadata.date + period.end_time.timestamp + dataset.metadata.date + period.start_timestamp ), + "end_time": str(dataset.metadata.date + period.end_timestamp), "start_frame_id": ( 0 if period.id == 1 @@ -339,9 +335,11 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # get whistles related to period directly from them. whistles = [] for period in periods_info: - whistle_start = whistle_end = {} + whistle_start = {} + whistle_end = {} # type - whistle_start["type"] = whistle_end["type"] = period["period"] + whistle_start["type"] = period["period"] + whistle_end["type"] = period["period"] # sub_type whistle_start["sub_type"] = "start" whistle_end["sub_type"] = "end" @@ -353,7 +351,10 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: metadata_json["match"] = { "id": str(dataset.metadata.game_id), # same as for the jsonl - "kickoff_time": str(dataset.metadata.periods[0].start_time), + "kickoff_time": str( + dataset.metadata.date + + dataset.metadata.periods[0].start_timestamp + ), "periods": periods_info, "whistles": whistles, # fake just to pass the test, I have to change this after. "round": "", diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index bcb3eb13e..130b8ff63 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -23,32 +23,32 @@ def meta_data(self, base_dir) -> Path: @pytest.fixture def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: """Load a small Sportec tracking data snippet for testing CDF serialization.""" - # return sportec.load_tracking( - # raw_data=raw_data, - # meta_data=meta_data, - # coordinates="sportec", - # limit=None, - # only_alive=False, - # ) - - from kloppy import pff - - # Path to data - roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" - metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" - raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" - - # Loading - dataset = pff.load_tracking( - raw_data=raw_data_path, - meta_data=metadata_path, - roster_meta_data=roster_path, - coordinates="pff", - limit=10, # only ten frames even if we are just gona use one of them. - sample_rate=None, + return sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + limit=None, + only_alive=False, ) + + # from kloppy import pff + + # # Path to data + # roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" + # metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" + # raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" + + # # Loading + # dataset = pff.load_tracking( + # raw_data=raw_data_path, + # meta_data=metadata_path, + # roster_meta_data=roster_path, + # coordinates="pff", + # limit=10, # only ten frames even if we are just gona use one of them. + # sample_rate=None, + # ) - return dataset + # return dataset def test_produces_valid_cdf_output(self, dataset): """Test that CDFTrackingDataSerializer produces valid CDF output.""" From c941ea1186e79aa064887ccc710e6086c8d15891 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Tue, 30 Sep 2025 03:57:30 +0100 Subject: [PATCH 16/23] minor --- .../infra/serializers/event/cdf/__init__.py | 0 .../infra/serializers/event/cdf/serializer.py | 409 ------------------ 2 files changed, 409 deletions(-) delete mode 100644 kloppy/infra/serializers/event/cdf/__init__.py delete mode 100644 kloppy/infra/serializers/event/cdf/serializer.py diff --git a/kloppy/infra/serializers/event/cdf/__init__.py b/kloppy/infra/serializers/event/cdf/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/kloppy/infra/serializers/event/cdf/serializer.py b/kloppy/infra/serializers/event/cdf/serializer.py deleted file mode 100644 index 4e99fb737..000000000 --- a/kloppy/infra/serializers/event/cdf/serializer.py +++ /dev/null @@ -1,409 +0,0 @@ -import json -from datetime import timedelta -from typing import IO, NamedTuple - -from kloppy.domain import Provider, EventDataset, Time, PositionType -from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer - - -class CDFEventOutputs(NamedTuple): - event_data: IO[bytes] - - -class CDFTrackingDataSerializer(TrackingDataSerializer[CDFEventOutputs]): - provider = Provider.CDF - - # to infer the starting formation if not given - @staticmethod - def get_starting_formation(list_players, team) -> str: - formation = "" - defender = midfiler = attacker = 0 - - for player in list_players: - if ( - team.get_player_by_id(player["id"]).starting_position.parent - == None - ): - continue - elif ( - team.get_player_by_id(player["id"]).starting_position.parent - == PositionType.Attacker - ): - attacker += 1 - elif ( - team.get_player_by_id(player["id"]).starting_position.parent - == PositionType.Midfielder - or team.get_player_by_id( - player["id"] - ).starting_position.parent.parent - == PositionType.Midfielder - ): - midfiler += 1 - elif ( - team.get_player_by_id( - player["id"] - ).starting_position.parent.parent - == PositionType.Defender - ): - defender += 1 - if defender + midfiler + attacker == 10: - formation = f"{defender}_{midfiler}_{attacker}" - return formation - - def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: - """ - Serialize a TrackingDataset to Common Data Format. - - Args: - dataset: The tracking dataset to serialize - outputs: CDFOutputs containing file handles for metadata and tracking data - - Returns: - bool: True if serialization was successful, False otherwise - - Note: - TODO: Open question: should the serializer make sure the data is in the right format, and - do a transformation if not in the right format? yes normally. - """ - - # Normalize the coordinate system - # creating the coordinate system according to the CDF paper specifications. - from kloppy.domain import ( - CustomCoordinateSystem, - Origin, - VerticalOrientation, - NormalizedPitchDimensions, - Dimension, - Orientation, - BallState, - ) - - # length and width of the pitch imported pitch - length = dataset.metadata.pitch_dimensions.pitch_length - width = dataset.metadata.pitch_dimensions.pitch_width - # build the cdf normalize coordinate system - CDF_coordinate_system = CustomCoordinateSystem( - origin=Origin.CENTER, - vertical_orientation=VerticalOrientation.BOTTOM_TO_TOP, - pitch_dimensions=NormalizedPitchDimensions( - x_dim=Dimension(min=-length / 2, max=length / 2), - y_dim=Dimension(min=-width / 2, max=width / 2), - pitch_length=length, - pitch_width=width, - ), - ) - # setting it as coordinate system of the imported data - dataset = dataset.transform( - to_coordinate_system=CDF_coordinate_system, - to_orientation=Orientation.STATIC_HOME_AWAY, - ) - ##-------------------------------------------------------------------------- - - ## building Tracking jsonl - # Output containers - metadata_json = {} - # tracking_datas = [] use if we want to manage all the frames - - # list of different periods within a game define by the cdf - periods = { - 1: "first_half", - 2: "second_half", - 3: "first_half_extratime", - 4: "second_half_extratime", - 5: "shootout", - } - - # Get home and away team data - home_team, away_team = dataset.metadata.teams - - # Get the players Id. - home_player_ids, away_player_ids = ( - [player.player_id for player in home_team.players], - [player.player_id for player in away_team.players], - ) - - for frame_id in range( - len([1]) - ): # change this when we would like to manage all the frames - frame_data = {} - - # Frame ID - frame_data["frame_id"] = frame_id - # Timestamp - frame_data["timestamp"] = str( - dataset.metadata.date + dataset[frame_id].timestamp - ) - # Period - frame_data["period"] = periods.get( - dataset[frame_id].period.id, "unknownn" - ) - # Match ID - frame_data["match"] = {"id": str(dataset.metadata.game_id)} - # Ball status - frame_data["ball_status"] = ( - dataset[0].ball_state == BallState.ALIVE - ) - - # Teams and players - home_players = [] - for player, coordinates in dataset[ - frame_id - ].players_coordinates.items(): - if player.player_id in home_player_ids: - try: - x = coordinates.x - y = coordinates.x - home_players.append( - { - "id": player.player_id, - "x": round(x, 3), - "y": round(y, 3), - "position": player.starting_position.code, - } - ) - except KeyError: - continue - - away_players = [] - for player, coordinates in dataset[ - frame_id - ].players_coordinates.items(): - if player.player_id in away_player_ids: - try: - x = coordinates.x - y = coordinates.x - away_players.append( - { - "id": player.player_id, - "x": round(x, 3), - "y": round(y, 3), - "position": player.starting_position.code, - } - ) - except KeyError: - continue - - # teams within the tracking data. - frame_data["teams"] = { - "home": { - "id": home_team.team_id, - "players": home_players, - "jersey_color": " ", # - "name": home_team.name, - "formation": ( - home_team.formations.at_start() - if home_team.formations.items - else self.get_starting_formation( - home_players, home_team - ) - ), - }, - "away": { - "id": away_team.team_id, - "players": away_players, - "jersey_color": " ", - "name": away_team.name, - "formation": ( - away_team.formations.at_start() - if away_team.formations.items - else self.get_starting_formation( - away_players, away_team - ) - ), - }, - } - - # Ball - if frame_data["ball_status"] == True: - try: - ball_x = round(dataset[frame_id].ball_coordinates.x, 3) - ball_y = round(dataset[frame_id].ball_coordinates.y, 3) - ball_z = round(dataset[frame_id].ball_coordinates.z, 3) - except KeyError: - ball_x = ball_y = ball_z = None - else: - ball_x = ball_y = ball_z = ( - dataset.metadata.pitch_dimensions.pitch_length + 10 - ) - - frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} - - # normally here when we will use all the frames we are suppose to add them successivelly to a list that we will then write as tracking data outputs - # but with only one frame we just dumpit in a json buffured. - # Add to tracking list - outputs.tracking_data.write( - (json.dumps(frame_data) + "\n").encode("utf-8") - ) - - ################################################ - ### build now the metadata. - # Competition infos. - metadata_json["competition"] = ( - { # we don't have any of these informations - "id": "", - "name": "", - "format": "", - "age_restriction": "", - "type": "", - } - ) - - # season infos. - metadata_json["season"] = { # we don't have any of these informations - "id": "", - "name": "", - } - - # match infos. - periods_info = [] - for period in dataset.metadata.periods: - curent_period = { - "period": periods[period.id], - "play_direction": "left_right", - "start_time": str( - dataset.metadata.date + period.start_timestamp - ), - "end_time": str(dataset.metadata.date + period.end_timestamp), - "start_frame_id": ( - 0 - if period.id == 1 - else len( - dataset.filter( - lambda frame: frame.period.id == 1 - ).to_df() - ) - ), - "end_frame_id": ( - len( - dataset.filter( - lambda frame: frame.period.id == period.id - ).to_df() - ) - - 1 - if period.id == 1 - else len( - dataset.filter( - lambda frame: frame.period.id == 1 - ).to_df() - ) - + len( - dataset.filter( - lambda frame: frame.period.id == period.id - ).to_df() - ) - - 1 - ), - "left_team_id": home_team.team_id, - "right_team_id": away_team.team_id, - } - periods_info.append(curent_period) - - ## building team_players for metadata - meta_home_players = [] - starters_ids = [] - for player, coordinates in dataset[0].players_coordinates.items(): - starters_ids.append(player.player_id) - - for player in home_team.players: - try: - meta_home_players.append( - { - "id": player.player_id, - "team_id": home_team.team_id, - "jersey_number": player.jersey_no, - "is_starter": player.player_id in starters_ids, - } - ) - except KeyError: - continue - - meta_away_players = [] - for player in away_team.players: - try: - meta_away_players.append( - { - "id": player.player_id, - "team_id": away_team.team_id, - "jersey_number": player.jersey_no, - "is_starter": player.player_id in starters_ids, - } - ) - except KeyError: - continue - - # get whistles related to period directly from them. - whistles = [] - for period in periods_info: - whistle_start = {} - whistle_end = {} - # type - whistle_start["type"] = period["period"] - whistle_end["type"] = period["period"] - # sub_type - whistle_start["sub_type"] = "start" - whistle_end["sub_type"] = "end" - # time - whistle_start["time"] = period["start_time"] - whistle_end["time"] = period["end_time"] - whistles.append(whistle_start) - whistles.append(whistle_end) - - metadata_json["match"] = { - "id": str(dataset.metadata.game_id), # same as for the jsonl - "kickoff_time": str( - dataset.metadata.date - + dataset.metadata.periods[0].start_timestamp - ), - "periods": periods_info, - "whistles": whistles, # fake just to pass the test, I have to change this after. - "round": "", - "scheduled_kickoff_time": str(dataset.metadata.date), - "local_kickoff_time": "", # how to get this ? - "misc": { - "country": "", # how to get this ? - "city": "", # how to get this ? - "percipitation": 0, # how to get this ? - "is_open_roof": True, # how to get this ? - }, - } - - metadata_json["teams"] = { - "home": { - "id": home_team.team_id, # same as for the jsonl - "players": meta_home_players, - "jersey_color": " ", - "name": home_team.name, - "formation": home_team.starting_formation - or self.get_starting_formation(meta_home_players, home_team), - }, - "away": { - "id": away_team.team_id, # same as for the jsonl - "players": meta_away_players, - "jersey_color": " ", - "name": away_team.name, - "formation": away_team.starting_formation - or self.get_starting_formation(meta_away_players, away_team), - }, - } - - metadata_json["stadium"] = { - "id": "", - "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, - "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, - "name": "", - "turf": "", - } - - metadata_json["meta"] = { - "video": None, - "tracking": None, - "limb": None, - "meta": None, - "cdf": None, - } - - outputs.meta_data.write( - (json.dumps(metadata_json) + "\n").encode("utf-8") - ) - - return True From 863529371ca2ec11c98acae0ad2ea907e3a6a9cd Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Sat, 4 Oct 2025 00:03:31 +0100 Subject: [PATCH 17/23] fixed the CDFcoordinateSystem, loop over the entire dataset and avoid any convertion with to_df() --- .../serializers/tracking/cdf/__init__.py | 3 + .../tracking/cdf/cdf_coordinate_system.py | 23 ++++ .../serializers/tracking/cdf/serializer.py | 128 +++++++----------- kloppy/tests/test_cdf.py | 99 +++++++------- 4 files changed, 124 insertions(+), 129 deletions(-) create mode 100644 kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py diff --git a/kloppy/infra/serializers/tracking/cdf/__init__.py b/kloppy/infra/serializers/tracking/cdf/__init__.py index e69de29bb..8c33fe043 100644 --- a/kloppy/infra/serializers/tracking/cdf/__init__.py +++ b/kloppy/infra/serializers/tracking/cdf/__init__.py @@ -0,0 +1,3 @@ +from .cdf_coordinate_system import CDFCoordinateSystem + +__all__ = ["CDFCoordinateSystem"] \ No newline at end of file diff --git a/kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py b/kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py new file mode 100644 index 000000000..4e624fde0 --- /dev/null +++ b/kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py @@ -0,0 +1,23 @@ +from kloppy.domain import (CustomCoordinateSystem,NormalizedPitchDimensions,Dimension,VerticalOrientation,Origin) + +class CDFCoordinateSystem: + + + def __init__(self, dataset): + self.length = dataset.metadata.pitch_dimensions.pitch_length + self.width = dataset.metadata.pitch_dimensions.pitch_width + # build the cdf normalize coordinate system + self.coordinate_system = CustomCoordinateSystem( + origin=Origin.CENTER, + vertical_orientation=VerticalOrientation.BOTTOM_TO_TOP, + pitch_dimensions=NormalizedPitchDimensions( + x_dim=Dimension(min=-self.length / 2, max=self.length / 2), + y_dim=Dimension(min=-self.width / 2, max=self.width / 2), + pitch_length= self.length, + pitch_width= self.width, + ), + ) + + def get_coordinate_system(self): + """Return the built coordinate system.""" + return self.coordinate_system \ No newline at end of file diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index c59d10e1a..24221664b 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -1,5 +1,6 @@ import json from datetime import timedelta +import tempfile from typing import IO, NamedTuple from kloppy.domain import Provider, TrackingDataset, Time, PositionType @@ -8,7 +9,7 @@ class CDFOutputs(NamedTuple): meta_data: IO[bytes] - tracking_data: IO[bytes] + tracking_data: list[IO[bytes]] class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): @@ -70,32 +71,15 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # Normalize the coordinate system # creating the coordinate system according to the CDF paper specifications. from kloppy.domain import ( - CustomCoordinateSystem, - Origin, - VerticalOrientation, - NormalizedPitchDimensions, - Dimension, Orientation, BallState, ) + # builded class. + from . import CDFCoordinateSystem - # length and width of the pitch imported pitch - length = dataset.metadata.pitch_dimensions.pitch_length - width = dataset.metadata.pitch_dimensions.pitch_width - # build the cdf normalize coordinate system - CDF_coordinate_system = CustomCoordinateSystem( - origin=Origin.CENTER, - vertical_orientation=VerticalOrientation.BOTTOM_TO_TOP, - pitch_dimensions=NormalizedPitchDimensions( - x_dim=Dimension(min=-length / 2, max=length / 2), - y_dim=Dimension(min=-width / 2, max=width / 2), - pitch_length=length, - pitch_width=width, - ), - ) # setting it as coordinate system of the imported data dataset = dataset.transform( - to_coordinate_system=CDF_coordinate_system, + to_coordinate_system = CDFCoordinateSystem(dataset).get_coordinate_system(), to_orientation=Orientation.STATIC_HOME_AWAY, ) ##-------------------------------------------------------------------------- @@ -103,7 +87,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ## building Tracking jsonl # Output containers metadata_json = {} - # tracking_datas = [] use if we want to manage all the frames + tracking_jsonls = [] # list of different periods within a game define by the cdf periods = { @@ -114,6 +98,15 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: 5: "shootout", } + # store the number of frame in each period + nbr_frame_per_period = { + 1:0, + 2:0, + 3:0, + 4:0, + 5:0, + } + # Get home and away team data home_team, away_team = dataset.metadata.teams @@ -123,33 +116,36 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: [player.player_id for player in away_team.players], ) - for frame_id in range( - len([1]) - ): # change this when we would like to manage all the frames + frame_id = 0 # Use for the cdf_frame_ids.. + for frame in dataset.frames: # change this when we would like to manage all the frames frame_data = {} - # Frame ID + # Frame ID specified by the CDF frame_data["frame_id"] = frame_id + # Original frame_id + frame_data["Original_frame_id"] = frame.frame_id # Timestamp frame_data["timestamp"] = str( - dataset.metadata.date + dataset[frame_id].timestamp + dataset.metadata.date + frame.timestamp ) # Period frame_data["period"] = periods.get( - dataset[frame_id].period.id, "unknownn" + frame.period.id, "unknownn" ) + + # Update the number of frame for this period + nbr_frame_per_period[frame.period.id] = nbr_frame_per_period[frame.period.id] + 1 + # Match ID frame_data["match"] = {"id": str(dataset.metadata.game_id)} # Ball status frame_data["ball_status"] = ( - dataset[0].ball_state == BallState.ALIVE + frame.ball_state == BallState.ALIVE ) # Teams and players home_players = [] - for player, coordinates in dataset[ - frame_id - ].players_coordinates.items(): + for player, coordinates in frame.players_coordinates.items(): if player.player_id in home_player_ids: try: x = coordinates.x @@ -166,9 +162,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: continue away_players = [] - for player, coordinates in dataset[ - frame_id - ].players_coordinates.items(): + for player, coordinates in frame.players_coordinates.items(): if player.player_id in away_player_ids: try: x = coordinates.x @@ -215,33 +209,35 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: } # Ball - if frame_data["ball_status"] == True: + if frame_data["ball_status"] == True and frame.ball_coordinates is not None: try: - ball_x = round(dataset[frame_id].ball_coordinates.x, 3) - ball_y = round(dataset[frame_id].ball_coordinates.y, 3) - ball_z = round(dataset[frame_id].ball_coordinates.z, 3) + ball_x = round(frame.ball_coordinates.x, 3) + ball_y = round(frame.ball_coordinates.y, 3) + ball_z = round(frame.ball_coordinates.z, 3) except KeyError: ball_x = ball_y = ball_z = None else: - ball_x = ball_y = ball_z = ( - dataset.metadata.pitch_dimensions.pitch_length + 10 - ) + ball_x = ball_y = ball_z = 404 # default missing value for ball coordinates frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} - - # normally here when we will use all the frames we are suppose to add them successivelly to a list that we will then write as tracking data outputs - # but with only one frame we just dumpit in a json buffured. - # Add to tracking list - outputs.tracking_data.write( - (json.dumps(frame_data) + "\n").encode("utf-8") - ) + + # update the frame_id + frame_id += 1 + + # build a temporary jsonl for each frame + frame_file = tempfile.NamedTemporaryFile(mode="w+b", suffix=".jsonl", delete=False) + frame_file.write((json.dumps(frame_data) + "\n").encode("utf-8")) + frame_file.flush() # make sure data is written + + # Add to tracking list + outputs.tracking_data.append(frame_file) ################################################ ### build now the metadata. # Competition infos. metadata_json["competition"] = ( { # we don't have any of these informations - "id": "", + "id": "MISSING_MANDATORY_COMPETITION_ID", "name": "", "format": "", "age_restriction": "", @@ -251,7 +247,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # season infos. metadata_json["season"] = { # we don't have any of these informations - "id": "", + "id": "MISSING_MANDATORY_SEASON_ID", "name": "", } @@ -268,31 +264,9 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "start_frame_id": ( 0 if period.id == 1 - else len( - dataset.filter( - lambda frame: frame.period.id == 1 - ).to_df() - ) - ), - "end_frame_id": ( - len( - dataset.filter( - lambda frame: frame.period.id == period.id - ).to_df() - ) - - 1 - if period.id == 1 - else len( - dataset.filter( - lambda frame: frame.period.id == 1 - ).to_df() - ) - + len( - dataset.filter( - lambda frame: frame.period.id == period.id - ).to_df() - ) - - 1 + else sum([nbr_frame_per_period[i] for i in range(1,period.id)]) + ), # We should note that these are starting and end frame_id on the cdf not the original starting and end frame_id + "end_frame_id": ( nbr_frame_per_period[period.id]-1 if period.id == 1 else sum([nbr_frame_per_period[i] for i in range(1,(period.id +1))]) - 1 ), "left_team_id": home_team.team_id, "right_team_id": away_team.team_id, @@ -388,7 +362,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: } metadata_json["stadium"] = { - "id": "", + "id": "MISSING_MANDATORY_STADIUM_ID", "pitch_length": dataset.metadata.pitch_dimensions.pitch_length, "pitch_width": dataset.metadata.pitch_dimensions.pitch_width, "name": "", diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 130b8ff63..5e62b8e57 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -23,67 +23,62 @@ def meta_data(self, base_dir) -> Path: @pytest.fixture def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: """Load a small Sportec tracking data snippet for testing CDF serialization.""" - return sportec.load_tracking( - raw_data=raw_data, - meta_data=meta_data, - coordinates="sportec", - limit=None, - only_alive=False, - ) - - # from kloppy import pff - - # # Path to data - # roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" - # metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" - # raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" - - # # Loading - # dataset = pff.load_tracking( - # raw_data=raw_data_path, - # meta_data=metadata_path, - # roster_meta_data=roster_path, - # coordinates="pff", - # limit=10, # only ten frames even if we are just gona use one of them. - # sample_rate=None, + # return sportec.load_tracking( + # raw_data=raw_data, + # meta_data=meta_data, + # coordinates="sportec", + # limit=None, + # only_alive=False, # ) + + from kloppy import pff + + # Path to data + roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" + metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" + raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" + + # Loading + dataset = pff.load_tracking( + raw_data=raw_data_path, + meta_data=metadata_path, + roster_meta_data=roster_path, + coordinates="pff", + limit=30000, # only ten frames even if we are just gona use one of them. + sample_rate=None, + ) - # return dataset + return dataset def test_produces_valid_cdf_output(self, dataset): """Test that CDFTrackingDataSerializer produces valid CDF output.""" serializer = CDFTrackingDataSerializer() - # Create temporary files with .jsonl extension for CDF validation - with tempfile.NamedTemporaryFile( - mode="w+b", suffix=".json", delete=False - ) as meta_file, tempfile.NamedTemporaryFile( - mode="w+b", suffix=".jsonl", delete=False - ) as tracking_file: + # Instantiate Validators + meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/schema/meta_v0.2.0.json") + tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/schema/tracking_v0.2.0.json") + with tempfile.NamedTemporaryFile(mode="w+b", suffix=".json", delete=False) as meta_file: + # Initialize empty list for tracking files + tracking_files: list[tempfile._TemporaryFileWrapper] = [] + # Instantiate the named tuple for outputs outputs = CDFOutputs( - meta_data=meta_file, tracking_data=tracking_file + meta_data=meta_file, + tracking_data=tracking_files ) - - # Serialize the small Sportec dataset to CDF format + # Serialize the dataset success = serializer.serialize(dataset, outputs) assert success is True - - # Close files to ensure data is written - meta_file.close() - tracking_file.close() - - # Validate using CDF validators - - # Validate tracking data - tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/schema/tracking_v0.2.0.json") - tracking_validator.validate_schema(sample=tracking_file.name) - - # Validate meta data first. - meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/schema/meta_v0.2.0.json") - meta_validator.validate_schema(sample=meta_file.name) - - - # Clean up temp files - Path(meta_file.name).unlink() - Path(tracking_file.name).unlink() + # Save paths for validation after leaving the block + meta_path = meta_file.name + tracking_paths = [f.name for f in outputs.tracking_data] + + # Validate metadata + meta_validator.validate_schema(sample=meta_path) + # Validate all tracking frame files + for path in tracking_paths: + tracking_validator.validate_schema(sample=path) + + Path(meta_path).unlink() + for path in tracking_paths: + Path(path).unlink() From 447a34a6fc2026ced422af31c21823d53916d880 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Tue, 7 Oct 2025 07:08:27 +0100 Subject: [PATCH 18/23] minor --- kloppy/infra/serializers/tracking/cdf/serializer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 24221664b..9d3be8e7c 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -20,7 +20,6 @@ class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): def get_starting_formation(list_players, team) -> str: formation = "" defender = midfiler = attacker = 0 - for player in list_players: if ( team.get_player_by_id(player["id"]).starting_position.parent @@ -87,7 +86,6 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ## building Tracking jsonl # Output containers metadata_json = {} - tracking_jsonls = [] # list of different periods within a game define by the cdf periods = { From 04e75557a9c8917848c95da40b79e264322a12c5 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Thu, 9 Oct 2025 14:08:55 +0100 Subject: [PATCH 19/23] update the start and end frame_id for each period , update the formation retrieving and get block on handling the class into kloppy.domain... --- kloppy/domain/models/common.py | 36 ++++ .../serializers/tracking/cdf/serializer.py | 194 +++++++++++++----- 2 files changed, 173 insertions(+), 57 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index 8ba58191a..fcb2f6933 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1185,6 +1185,41 @@ def pitch_dimensions(self) -> PitchDimensions: pitch_width=None, standardized=False, ) + + +class CDFCoordinateSystem(ProviderCoordinateSystem): + """ + CDFCoordinateSystem coordinate system. + + Uses a pitch with the origin at the center and the y-axis oriented + from bottom to top. The coordinates are in meters. + """ + + @property + def provider(self) -> Provider: + return Provider.CDF + + @property + def origin(self) -> Origin: + return Origin.CENTER + + @property + def vertical_orientation(self) -> VerticalOrientation: + return VerticalOrientation.BOTTOM_TO_TOP + + @property + def pitch_dimensions(self) -> PitchDimensions: + return NormalizedPitchDimensions( + x_dim=Dimension( + -1 * self._pitch_length / 2, self._pitch_length / 2 + ), + y_dim=Dimension( + -1 * self._pitch_width / 2, self._pitch_width / 2 + ), + pitch_length=self._pitch_length, + pitch_width=self._pitch_width, + standardized=False, + ) class SignalityCoordinateSystem(ProviderCoordinateSystem): @@ -1392,6 +1427,7 @@ def build_coordinate_system( Provider.HAWKEYE: HawkEyeCoordinateSystem, Provider.SPORTVU: SportVUCoordinateSystem, Provider.SIGNALITY: SignalityCoordinateSystem, + Provider.CDF: CDFCoordinateSystem, } if provider in coordinate_systems: diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 9d3be8e7c..42bea4183 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -17,38 +17,29 @@ class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): # to infer the starting formation if not given @staticmethod - def get_starting_formation(list_players, team) -> str: + def get_starting_formation(team_players) -> str: formation = "" - defender = midfiler = attacker = 0 - for player in list_players: - if ( - team.get_player_by_id(player["id"]).starting_position.parent - == None - ): + defender = midfielder = attacker = 0 + for player in team_players: + if player.starting_position.position_group == None: continue elif ( - team.get_player_by_id(player["id"]).starting_position.parent + player.starting_position.position_group == PositionType.Attacker ): attacker += 1 elif ( - team.get_player_by_id(player["id"]).starting_position.parent - == PositionType.Midfielder - or team.get_player_by_id( - player["id"] - ).starting_position.parent.parent + player.starting_position.position_group == PositionType.Midfielder ): - midfiler += 1 + midfielder += 1 elif ( - team.get_player_by_id( - player["id"] - ).starting_position.parent.parent + player.starting_position.position_group == PositionType.Defender ): defender += 1 - if defender + midfiler + attacker == 10: - formation = f"{defender}_{midfiler}_{attacker}" + if defender + midfielder + attacker == 10: + formation = f"{defender}-{midfielder}-{attacker}" return formation def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: @@ -73,12 +64,15 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: Orientation, BallState, ) + # builded class. - from . import CDFCoordinateSystem + from . import CDFCoordinateSystem # setting it as coordinate system of the imported data dataset = dataset.transform( - to_coordinate_system = CDFCoordinateSystem(dataset).get_coordinate_system(), + to_coordinate_system=CDFCoordinateSystem( + dataset + ).get_coordinate_system(), to_orientation=Orientation.STATIC_HOME_AWAY, ) ##-------------------------------------------------------------------------- @@ -96,15 +90,25 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: 5: "shootout", } - # store the number of frame in each period - nbr_frame_per_period = { - 1:0, - 2:0, - 3:0, - 4:0, - 5:0, + # container for stat and end frame_id + period_start_frame_id = { + period.id: None for period in dataset.metadata.periods + } + period_end_frame_id = { + period.id: None for period in dataset.metadata.periods + } + + # container for stat and end normalized frame_id + normalized_period_start_frame_id = { + period.id: None for period in dataset.metadata.periods + } + normalized_period_end_frame_id = { + period.id: None for period in dataset.metadata.periods } + # diffence of ids between frame_ids + period_offset = {period.id: 0 for period in dataset.metadata.periods} + # Get home and away team data home_team, away_team = dataset.metadata.teams @@ -114,8 +118,8 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: [player.player_id for player in away_team.players], ) - frame_id = 0 # Use for the cdf_frame_ids.. - for frame in dataset.frames: # change this when we would like to manage all the frames + frame_id = 0 # Use for the cdf_frame_ids.. + for frame in dataset.frames: frame_data = {} # Frame ID specified by the CDF @@ -127,19 +131,46 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: dataset.metadata.date + frame.timestamp ) # Period - frame_data["period"] = periods.get( - frame.period.id, "unknownn" - ) - - # Update the number of frame for this period - nbr_frame_per_period[frame.period.id] = nbr_frame_per_period[frame.period.id] + 1 + frame_data["period"] = periods.get(frame.period.id, "unknownn") + period_id = frame.period.id + # Update the start and end id for this period + if period_start_frame_id[period_id] is None: + period_start_frame_id[period_id] = frame_data[ + "Original_frame_id" + ] + + if ( + period_id > 1 + and period_end_frame_id[period_id - 1] is not None + ): + prev_period_length = ( + period_end_frame_id[period_id - 1] + - period_start_frame_id[period_id - 1] + + 1 + ) + period_offset[period_id] = ( + period_offset[period_id - 1] + prev_period_length + ) + + # Set normalized start frame id + normalized_period_start_frame_id[period_id] = period_offset[ + period_id + ] + + period_end_frame_id[period_id] = frame_data["Original_frame_id"] + + normalized_frame_id = ( + frame_data["Original_frame_id"] + - period_start_frame_id[period_id] + ) + period_offset[period_id] + + # Update normalized end frame id + normalized_period_end_frame_id[period_id] = normalized_frame_id # Match ID frame_data["match"] = {"id": str(dataset.metadata.game_id)} # Ball status - frame_data["ball_status"] = ( - frame.ball_state == BallState.ALIVE - ) + frame_data["ball_status"] = frame.ball_state == BallState.ALIVE # Teams and players home_players = [] @@ -177,6 +208,17 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: continue # teams within the tracking data. + + home_players_id = [] + away_players_id = [] + for player, _ in frame.players_coordinates.items(): + if player.team == home_team: + home_players_id.append(player.player_id) + if player.team == away_team: + away_players_id.append(player.player_id) + set_of_home_players_id_in_the_frame = set(home_players_id) + set_of_away_players_id_in_the_frame = set(away_players_id) + frame_data["teams"] = { "home": { "id": home_team.team_id, @@ -187,7 +229,12 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: home_team.formations.at_start() if home_team.formations.items else self.get_starting_formation( - home_players, home_team + [ + p + for p in home_team.players + if p.player_id + in set_of_home_players_id_in_the_frame + ] ) ), }, @@ -200,14 +247,22 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: away_team.formations.at_start() if away_team.formations.items else self.get_starting_formation( - away_players, away_team + [ + p + for p in away_team.players + if p.player_id + in set_of_away_players_id_in_the_frame + ] ) ), }, } # Ball - if frame_data["ball_status"] == True and frame.ball_coordinates is not None: + if ( + frame_data["ball_status"] == True + and frame.ball_coordinates is not None + ): try: ball_x = round(frame.ball_coordinates.x, 3) ball_y = round(frame.ball_coordinates.y, 3) @@ -215,19 +270,23 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: except KeyError: ball_x = ball_y = ball_z = None else: - ball_x = ball_y = ball_z = 404 # default missing value for ball coordinates + ball_x = ball_y = ball_z = ( + 404 # default missing value for ball coordinates + ) frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} - + # update the frame_id frame_id += 1 # build a temporary jsonl for each frame - frame_file = tempfile.NamedTemporaryFile(mode="w+b", suffix=".jsonl", delete=False) + frame_file = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".jsonl", delete=False + ) frame_file.write((json.dumps(frame_data) + "\n").encode("utf-8")) frame_file.flush() # make sure data is written - - # Add to tracking list + + # Add to tracking list outputs.tracking_data.append(frame_file) ################################################ @@ -259,13 +318,8 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: dataset.metadata.date + period.start_timestamp ), "end_time": str(dataset.metadata.date + period.end_timestamp), - "start_frame_id": ( - 0 - if period.id == 1 - else sum([nbr_frame_per_period[i] for i in range(1,period.id)]) - ), # We should note that these are starting and end frame_id on the cdf not the original starting and end frame_id - "end_frame_id": ( nbr_frame_per_period[period.id]-1 if period.id == 1 else sum([nbr_frame_per_period[i] for i in range(1,(period.id +1))]) - 1 - ), + "start_frame_id": normalized_period_start_frame_id[period.id], + "end_frame_id": normalized_period_end_frame_id[period.id], "left_team_id": home_team.team_id, "right_team_id": away_team.team_id, } @@ -340,6 +394,18 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: }, } + home_players_id_in_meta = [] + away_players_id_in_meta = [] + for player, _ in dataset[0].players_coordinates.items(): + if player.team == home_team: + home_players_id_in_meta.append(player.player_id) + if player.team == away_team: + away_players_id_in_meta.append(player.player_id) + meta_set_of_home_players_id_in_the_frame = set(home_players_id_in_meta) + print(meta_set_of_home_players_id_in_the_frame) + meta_set_of_away_players_id_in_the_frame = set(away_players_id_in_meta) + print(meta_set_of_away_players_id_in_the_frame) + metadata_json["teams"] = { "home": { "id": home_team.team_id, # same as for the jsonl @@ -347,15 +413,29 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: "jersey_color": " ", "name": home_team.name, "formation": home_team.starting_formation - or self.get_starting_formation(meta_home_players, home_team), + or self.get_starting_formation( + [ + p + for p in home_team.players + if p.player_id + in meta_set_of_home_players_id_in_the_frame + ] + ), }, "away": { - "id": away_team.team_id, # same as for the jsonl + "id": away_team.team_id, "players": meta_away_players, "jersey_color": " ", "name": away_team.name, "formation": away_team.starting_formation - or self.get_starting_formation(meta_away_players, away_team), + or self.get_starting_formation( + [ + p + for p in away_team.players + if p.player_id + in meta_set_of_away_players_id_in_the_frame + ] + ), }, } From 84a75102c42cfe0c6360bdcb5f040e60165f395f Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Fri, 17 Oct 2025 09:37:54 +0100 Subject: [PATCH 20/23] fix changes --- kloppy/domain/models/common.py | 6 ++++- .../serializers/tracking/cdf/serializer.py | 25 ++++++++----------- kloppy/tests/test_cdf.py | 6 ++--- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/kloppy/domain/models/common.py b/kloppy/domain/models/common.py index fcb2f6933..caf7c868e 100644 --- a/kloppy/domain/models/common.py +++ b/kloppy/domain/models/common.py @@ -1216,10 +1216,14 @@ def pitch_dimensions(self) -> PitchDimensions: y_dim=Dimension( -1 * self._pitch_width / 2, self._pitch_width / 2 ), - pitch_length=self._pitch_length, + pitch_length = self._pitch_length, pitch_width=self._pitch_width, standardized=False, ) + + def __init__(self, base_coordinate_system: ProviderCoordinateSystem): + self._pitch_length = base_coordinate_system.pitch_dimensions.pitch_length + self._pitch_width = base_coordinate_system.pitch_dimensions.pitch_width class SignalityCoordinateSystem(ProviderCoordinateSystem): diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 42bea4183..a9d07d037 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -1,9 +1,8 @@ import json -from datetime import timedelta import tempfile from typing import IO, NamedTuple -from kloppy.domain import Provider, TrackingDataset, Time, PositionType +from kloppy.domain import Provider, TrackingDataset, PositionType from kloppy.infra.serializers.tracking.serializer import TrackingDataSerializer @@ -58,29 +57,24 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: do a transformation if not in the right format? yes normally. """ - # Normalize the coordinate system - # creating the coordinate system according to the CDF paper specifications. from kloppy.domain import ( Orientation, BallState, ) # builded class. - from . import CDFCoordinateSystem + from kloppy.domain.models.common import CDFCoordinateSystem # setting it as coordinate system of the imported data dataset = dataset.transform( to_coordinate_system=CDFCoordinateSystem( - dataset - ).get_coordinate_system(), + dataset.metadata.coordinate_system + ), to_orientation=Orientation.STATIC_HOME_AWAY, ) - ##-------------------------------------------------------------------------- + ##--------------------------------------------------------------------- ## building Tracking jsonl - # Output containers - metadata_json = {} - # list of different periods within a game define by the cdf periods = { 1: "first_half", @@ -289,8 +283,9 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # Add to tracking list outputs.tracking_data.append(frame_file) - ################################################ - ### build now the metadata. + ###################### build now the metadata. + # Output containers + metadata_json = {} # Competition infos. metadata_json["competition"] = ( { # we don't have any of these informations @@ -450,7 +445,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: metadata_json["meta"] = { "video": None, "tracking": None, - "limb": None, + "landmarks": None, "meta": None, "cdf": None, } @@ -459,4 +454,4 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: (json.dumps(metadata_json) + "\n").encode("utf-8") ) - return True + return True \ No newline at end of file diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 5e62b8e57..7a1135943 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -55,8 +55,8 @@ def test_produces_valid_cdf_output(self, dataset): serializer = CDFTrackingDataSerializer() # Instantiate Validators - meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/schema/meta_v0.2.0.json") - tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/schema/tracking_v0.2.0.json") + meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/v0.2.1/schema/meta.json") + tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/v0.2.1/schema/tracking.json") with tempfile.NamedTemporaryFile(mode="w+b", suffix=".json", delete=False) as meta_file: # Initialize empty list for tracking files @@ -81,4 +81,4 @@ def test_produces_valid_cdf_output(self, dataset): Path(meta_path).unlink() for path in tracking_paths: - Path(path).unlink() + Path(path).unlink() \ No newline at end of file From dcfb26f9e675547893148cdfecbb86b4615bb5b0 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Fri, 17 Oct 2025 10:20:58 +0100 Subject: [PATCH 21/23] minor --- .../serializers/tracking/cdf/__init__.py | 2 +- .../tracking/cdf/cdf_coordinate_system.py | 23 -------------- kloppy/tests/test_cdf.py | 31 ++++--------------- 3 files changed, 7 insertions(+), 49 deletions(-) delete mode 100644 kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py diff --git a/kloppy/infra/serializers/tracking/cdf/__init__.py b/kloppy/infra/serializers/tracking/cdf/__init__.py index 8c33fe043..21944af32 100644 --- a/kloppy/infra/serializers/tracking/cdf/__init__.py +++ b/kloppy/infra/serializers/tracking/cdf/__init__.py @@ -1,3 +1,3 @@ -from .cdf_coordinate_system import CDFCoordinateSystem +from kloppy.domain.models.common import CDFCoordinateSystem __all__ = ["CDFCoordinateSystem"] \ No newline at end of file diff --git a/kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py b/kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py deleted file mode 100644 index 4e624fde0..000000000 --- a/kloppy/infra/serializers/tracking/cdf/cdf_coordinate_system.py +++ /dev/null @@ -1,23 +0,0 @@ -from kloppy.domain import (CustomCoordinateSystem,NormalizedPitchDimensions,Dimension,VerticalOrientation,Origin) - -class CDFCoordinateSystem: - - - def __init__(self, dataset): - self.length = dataset.metadata.pitch_dimensions.pitch_length - self.width = dataset.metadata.pitch_dimensions.pitch_width - # build the cdf normalize coordinate system - self.coordinate_system = CustomCoordinateSystem( - origin=Origin.CENTER, - vertical_orientation=VerticalOrientation.BOTTOM_TO_TOP, - pitch_dimensions=NormalizedPitchDimensions( - x_dim=Dimension(min=-self.length / 2, max=self.length / 2), - y_dim=Dimension(min=-self.width / 2, max=self.width / 2), - pitch_length= self.length, - pitch_width= self.width, - ), - ) - - def get_coordinate_system(self): - """Return the built coordinate system.""" - return self.coordinate_system \ No newline at end of file diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index 7a1135943..f0711d763 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -23,33 +23,14 @@ def meta_data(self, base_dir) -> Path: @pytest.fixture def dataset(self, raw_data: Path, meta_data: Path) -> TrackingDataset: """Load a small Sportec tracking data snippet for testing CDF serialization.""" - # return sportec.load_tracking( - # raw_data=raw_data, - # meta_data=meta_data, - # coordinates="sportec", - # limit=None, - # only_alive=False, - # ) - - from kloppy import pff - - # Path to data - roster_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_roster.json" - metadata_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812_metadata.json" - raw_data_path = "/home/student/Documents/AIMS/Intership/pysport/pysport-aims/first_week/data/3812/3812.jsonl.bz2" - - # Loading - dataset = pff.load_tracking( - raw_data=raw_data_path, - meta_data=metadata_path, - roster_meta_data=roster_path, - coordinates="pff", - limit=30000, # only ten frames even if we are just gona use one of them. - sample_rate=None, + return sportec.load_tracking( + raw_data=raw_data, + meta_data=meta_data, + coordinates="sportec", + limit=None, + only_alive=False, ) - return dataset - def test_produces_valid_cdf_output(self, dataset): """Test that CDFTrackingDataSerializer produces valid CDF output.""" serializer = CDFTrackingDataSerializer() From 5a2f293c8934f93172e5b727a25df400b01d2dd5 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Thu, 23 Oct 2025 17:11:02 +0100 Subject: [PATCH 22/23] --- .../serializers/tracking/cdf/serializer.py | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index a9d07d037..763ddd8a2 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -62,7 +62,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: BallState, ) - # builded class. + # builded coordinateSystem class. from kloppy.domain.models.common import CDFCoordinateSystem # setting it as coordinate system of the imported data @@ -72,7 +72,6 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ), to_orientation=Orientation.STATIC_HOME_AWAY, ) - ##--------------------------------------------------------------------- ## building Tracking jsonl # list of different periods within a game define by the cdf @@ -84,7 +83,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: 5: "shootout", } - # container for stat and end frame_id + # container for start and end frame_id period_start_frame_id = { period.id: None for period in dataset.metadata.periods } @@ -92,7 +91,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: period.id: None for period in dataset.metadata.periods } - # container for stat and end normalized frame_id + # container for start and end normalized frame_id normalized_period_start_frame_id = { period.id: None for period in dataset.metadata.periods } @@ -115,7 +114,6 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: frame_id = 0 # Use for the cdf_frame_ids.. for frame in dataset.frames: frame_data = {} - # Frame ID specified by the CDF frame_data["frame_id"] = frame_id # Original frame_id @@ -288,7 +286,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: metadata_json = {} # Competition infos. metadata_json["competition"] = ( - { # we don't have any of these informations + { "id": "MISSING_MANDATORY_COMPETITION_ID", "name": "", "format": "", @@ -298,7 +296,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: ) # season infos. - metadata_json["season"] = { # we don't have any of these informations + metadata_json["season"] = { "id": "MISSING_MANDATORY_SEASON_ID", "name": "", } @@ -371,21 +369,21 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: whistles.append(whistle_end) metadata_json["match"] = { - "id": str(dataset.metadata.game_id), # same as for the jsonl + "id": str(dataset.metadata.game_id), "kickoff_time": str( dataset.metadata.date + dataset.metadata.periods[0].start_timestamp ), "periods": periods_info, - "whistles": whistles, # fake just to pass the test, I have to change this after. + "whistles": whistles, "round": "", "scheduled_kickoff_time": str(dataset.metadata.date), - "local_kickoff_time": "", # how to get this ? + "local_kickoff_time": "", "misc": { - "country": "", # how to get this ? - "city": "", # how to get this ? - "percipitation": 0, # how to get this ? - "is_open_roof": True, # how to get this ? + "country": "", + "city": "", + "percipitation": 0, + "is_open_roof": True, # Asume as default value }, } @@ -397,13 +395,11 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: if player.team == away_team: away_players_id_in_meta.append(player.player_id) meta_set_of_home_players_id_in_the_frame = set(home_players_id_in_meta) - print(meta_set_of_home_players_id_in_the_frame) meta_set_of_away_players_id_in_the_frame = set(away_players_id_in_meta) - print(meta_set_of_away_players_id_in_the_frame) metadata_json["teams"] = { "home": { - "id": home_team.team_id, # same as for the jsonl + "id": home_team.team_id, "players": meta_home_players, "jersey_color": " ", "name": home_team.name, From 91d92ad07598df52efa7e77a87dc501c589ef310 Mon Sep 17 00:00:00 2001 From: stephTchembeu Date: Fri, 24 Oct 2025 04:29:55 +0100 Subject: [PATCH 23/23] fixed all the comments. --- .../serializers/tracking/cdf/serializer.py | 53 ++++++++++--------- kloppy/tests/test_cdf.py | 5 +- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/kloppy/infra/serializers/tracking/cdf/serializer.py b/kloppy/infra/serializers/tracking/cdf/serializer.py index 763ddd8a2..4c682b4ef 100644 --- a/kloppy/infra/serializers/tracking/cdf/serializer.py +++ b/kloppy/infra/serializers/tracking/cdf/serializer.py @@ -17,6 +17,15 @@ class CDFTrackingDataSerializer(TrackingDataSerializer[CDFOutputs]): # to infer the starting formation if not given @staticmethod def get_starting_formation(team_players) -> str: + """ + determine the starting formation if not define. + + Args: + team: The team on which we want to infer the formation. + + Returns: + formation: the infered formation. + """ formation = "" defender = midfielder = attacker = 0 for player in team_players: @@ -51,10 +60,6 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: Returns: bool: True if serialization was successful, False otherwise - - Note: - TODO: Open question: should the serializer make sure the data is in the right format, and - do a transformation if not in the right format? yes normally. """ from kloppy.domain import ( @@ -262,9 +267,9 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: except KeyError: ball_x = ball_y = ball_z = None else: - ball_x = ball_y = ball_z = ( - 404 # default missing value for ball coordinates - ) + ball_x = ( + ball_y + ) = ball_z = 404 # default missing value for ball coordinates frame_data["ball"] = {"x": ball_x, "y": ball_y, "z": ball_z} @@ -285,18 +290,16 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: # Output containers metadata_json = {} # Competition infos. - metadata_json["competition"] = ( - { - "id": "MISSING_MANDATORY_COMPETITION_ID", - "name": "", - "format": "", - "age_restriction": "", - "type": "", - } - ) + metadata_json["competition"] = { + "id": "MISSING_MANDATORY_COMPETITION_ID", + "name": "", + "format": "", + "age_restriction": "", + "type": "", + } # season infos. - metadata_json["season"] = { + metadata_json["season"] = { "id": "MISSING_MANDATORY_SEASON_ID", "name": "", } @@ -369,20 +372,20 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: whistles.append(whistle_end) metadata_json["match"] = { - "id": str(dataset.metadata.game_id), + "id": str(dataset.metadata.game_id), "kickoff_time": str( dataset.metadata.date + dataset.metadata.periods[0].start_timestamp ), "periods": periods_info, - "whistles": whistles, + "whistles": whistles, "round": "", "scheduled_kickoff_time": str(dataset.metadata.date), - "local_kickoff_time": "", + "local_kickoff_time": "", "misc": { - "country": "", - "city": "", - "percipitation": 0, + "country": "", + "city": "", + "percipitation": 0, "is_open_roof": True, # Asume as default value }, } @@ -399,7 +402,7 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: metadata_json["teams"] = { "home": { - "id": home_team.team_id, + "id": home_team.team_id, "players": meta_home_players, "jersey_color": " ", "name": home_team.name, @@ -450,4 +453,4 @@ def serialize(self, dataset: TrackingDataset, outputs: CDFOutputs) -> bool: (json.dumps(metadata_json) + "\n").encode("utf-8") ) - return True \ No newline at end of file + return True diff --git a/kloppy/tests/test_cdf.py b/kloppy/tests/test_cdf.py index f0711d763..ea7525656 100644 --- a/kloppy/tests/test_cdf.py +++ b/kloppy/tests/test_cdf.py @@ -3,6 +3,7 @@ import pytest import cdf +from cdf import VERSION from kloppy import sportec from kloppy.domain import TrackingDataset @@ -36,8 +37,8 @@ def test_produces_valid_cdf_output(self, dataset): serializer = CDFTrackingDataSerializer() # Instantiate Validators - meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/v0.2.1/schema/meta.json") - tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/v0.2.1/schema/tracking.json") + meta_validator = cdf.MetaSchemaValidator(schema="cdf/files/v{cdf.VERSION}/schema/meta.json") + tracking_validator = cdf.TrackingSchemaValidator(schema="cdf/files/v{cdf.VERSION}/schema/tracking.json") with tempfile.NamedTemporaryFile(mode="w+b", suffix=".json", delete=False) as meta_file: # Initialize empty list for tracking files