getml · Urfoex · Sep 17, 2025 · Sep 2, 2025
diff --git a/src/getml_io/getml/metadatas.py b/src/getml_io/getml/metadatas.py
@@ -0,0 +1,13 @@
+from pydantic import BaseModel
+
+from getml_io.getml.roles import Roles
+
+
+class DataFrameMetaData(BaseModel, frozen=True):
+    name: str
+    roles: Roles
+
+
+class PipelineMetaData(BaseModel, frozen=True):
+    population: DataFrameMetaData | None
+    peripheral: list[DataFrameMetaData]
diff --git a/src/getml_io/metadata/pipeline_information.py b/src/getml_io/metadata/pipeline_information.py
@@ -12,6 +12,7 @@
 from getml_io.getml.columns import Column
 from getml_io.getml.feature_learning import FeatureLearner
 from getml_io.getml.features import Features
+from getml_io.getml.metadatas import PipelineMetaData
 from getml_io.getml.predictors import FeatureSelector, Predictor
 from getml_io.getml.preprocessors import Preprocessor
 from getml_io.getml.scores import Scores
@@ -45,5 +46,5 @@ class PipelineInformation(BaseModel, frozen=True):
     features: Features
     scores: Scores
     columns: Sequence[Column]
-    # metadata # TODO @urfoex: #51
+    metadata: PipelineMetaData
     # tables # TODO @urfoex: #52
diff --git a/src/getml_io/serialize/data_model.py b/src/getml_io/serialize/data_model.py
@@ -19,15 +19,23 @@ def serialize_data_model(data_model: DataModel) -> DataModelInformation:
         DataModelInformation: The serialized DataModel information.
 
     """
+    peripheral = {
+        name: [
+            serialize_placeholder(placeholder)
+            for placeholder in (
+                [placeholders]
+                if isinstance(placeholders, Placeholder)
+                else placeholders
+            )
+        ]
+        for name, placeholders in cast(
+            "dict[str, Placeholder | list[Placeholder]]",
+            data_model.peripheral,
+        ).items()
+    }
     return DataModelInformation(
         population=serialize_placeholder(
             data_model.population,
         ),
-        peripheral={
-            name: [serialize_placeholder(placeholder) for placeholder in placeholders]
-            for name, placeholders in cast(
-                "dict[str, list[Placeholder]]",
-                cast("object", data_model.peripheral),
-            ).items()
-        },
+        peripheral=peripheral,
     )
diff --git a/src/getml_io/serialize/pipeline.py b/src/getml_io/serialize/pipeline.py
@@ -19,6 +19,8 @@
 from getml.pipeline import Pipeline
 from getml.pipeline import Scores as GetMLScores
 from getml.pipeline.column import Column as GetMLColumn
+from getml.pipeline.metadata import AllMetadata
+from getml.pipeline.metadata import Metadata as GetMLMetadata
 from getml.pipeline.score import ClassificationScore as GetMLClassificationScore
 from getml.pipeline.score import RegressionScore as GetMLRegressionScore
 from getml.pipeline.score import Score as GetMLScore
@@ -34,6 +36,10 @@
     RelMT,
 )
 from getml_io.getml.features import Feature, Features
+from getml_io.getml.metadatas import (
+    DataFrameMetaData,
+    PipelineMetaData,
+)
 from getml_io.getml.predictors import (
     LinearRegression,
     LogisticRegression,
@@ -69,6 +75,7 @@
 )
 from getml_io.serialize.pipeline_information import serialize_pipeline_information
 from getml_io.serialize.placeholder import serialize_placeholder
+from getml_io.serialize.roles import serialize_roles
 from getml_io.utils.convert import (
     assume_is_dict_str_to_dataframe_or_view,
 )
@@ -141,7 +148,7 @@ def serialize_pipeline(
         features=serialize_features(pipeline.features),
         scores=serialize_scores(pipeline.scores),
         columns=serialize_columns(pipeline.columns),
-        # metadata # TODO @urfoex: #51
+        metadata=serialize_all_metadata(pipeline.metadata),
         # tables # TODO @urfoex: #52
     )
     pipeline_information_json_path = serialize_pipeline_information(
@@ -437,3 +444,31 @@ def serialize_columns(getml_columns: GetMLColumns | None) -> list[Column]:
         )
         for column in columns
     ]
+
+
+def serialize_all_metadata(all_metadata: AllMetadata | None) -> PipelineMetaData:
+    """Serialize getML AllMetadata into a PipelineMetaData object.
+
+    Args:
+        all_metadata: The getML AllMetadata to serialize.
+
+    Returns:
+        PipelineMetaData: The serialized PipelineMetaData information.
+
+    """
+    if all_metadata is None:
+        return PipelineMetaData(population=None, peripheral=[])
+
+    return PipelineMetaData(
+        population=_serialize_metadata(all_metadata.population),
+        peripheral=[
+            _serialize_metadata(metadata) for metadata in all_metadata.peripheral
+        ],
+    )
+
+
+def _serialize_metadata(metadata: GetMLMetadata) -> DataFrameMetaData:
+    return DataFrameMetaData(
+        name=metadata.name,
+        roles=serialize_roles(metadata.roles),
+    )
diff --git a/tests/integration/data/loans/expected.pipeline.json b/tests/integration/data/loans/expected.pipeline.json
@@ -631,11 +631,11 @@
   "feature_learners": [
     {
       "aggregation": [
-        "AVG",
-        "MAX",
+        "MIN",
         "COUNT",
         "SUM",
-        "MIN"
+        "MAX",
+        "AVG"
       ],
       "allow_sets": true,
       "delta_t": 0.0,
@@ -649,19 +649,19 @@
       "num_threads": 0,
       "propositionalization": {
         "aggregation": [
-          "MODE",
-          "COUNT MINUS COUNT DISTINCT",
           "STDDEV",
-          "FIRST",
+          "MIN",
+          "COUNT DISTINCT",
           "COUNT",
-          "AVG",
           "MEDIAN",
-          "SUM",
           "MAX",
+          "SUM",
+          "FIRST",
           "LAST",
-          "COUNT DISTINCT",
           "TREND",
-          "MIN"
+          "AVG",
+          "COUNT MINUS COUNT DISTINCT",
+          "MODE"
         ],
         "delta_t": 0.0,
         "loss_function": "CrossEntropyLoss",
@@ -1236,5 +1236,134 @@
       "target": "default",
       "importance": 0.16812257116305224
     }
-  ]
+  ],
+  "metadata": {
+    "population": {
+      "name": "train",
+      "roles": {
+        "categorical": [
+          "frequency"
+        ],
+        "join_key": [
+          "account_id"
+        ],
+        "numerical": [
+          "duration",
+          "payments",
+          "amount"
+        ],
+        "target": [
+          "default"
+        ],
+        "text": [],
+        "time_stamp": [
+          "date_loan"
+        ],
+        "unused_float": [
+          "loan_id",
+          "district_id"
+        ],
+        "unused_string": [
+          "date_account",
+          "status"
+        ]
+      }
+    },
+    "peripheral": [
+      {
+        "name": "meta",
+        "roles": {
+          "categorical": [
+            "type_disp",
+            "type_card",
+            "gender",
+            "A3"
+          ],
+          "join_key": [
+            "account_id"
+          ],
+          "numerical": [
+            "A4",
+            "A5",
+            "A6",
+            "A7",
+            "A8",
+            "A9",
+            "A10",
+            "A11",
+            "A12",
+            "A13",
+            "A14",
+            "A15",
+            "A16"
+          ],
+          "target": [],
+          "text": [],
+          "time_stamp": [],
+          "unused_float": [
+            "disp_id",
+            "client_id",
+            "card_id",
+            "district_id"
+          ],
+          "unused_string": [
+            "issued",
+            "birth_date",
+            "A2"
+          ]
+        }
+      },
+      {
+        "name": "order",
+        "roles": {
+          "categorical": [
+            "bank_to",
+            "k_symbol"
+          ],
+          "join_key": [
+            "account_id"
+          ],
+          "numerical": [
+            "amount"
+          ],
+          "target": [],
+          "text": [],
+          "time_stamp": [],
+          "unused_float": [
+            "account_to",
+            "order_id"
+          ],
+          "unused_string": []
+        }
+      },
+      {
+        "name": "trans",
+        "roles": {
+          "categorical": [
+            "type",
+            "k_symbol",
+            "bank",
+            "operation"
+          ],
+          "join_key": [
+            "account_id"
+          ],
+          "numerical": [
+            "amount",
+            "balance"
+          ],
+          "target": [],
+          "text": [],
+          "time_stamp": [
+            "date"
+          ],
+          "unused_float": [
+            "trans_id",
+            "account"
+          ],
+          "unused_string": []
+        }
+      }
+    ]
+  }
 }
diff --git a/tests/integration/data/numerical/expected.pipeline.json b/tests/integration/data/numerical/expected.pipeline.json
@@ -570,19 +570,19 @@
       "num_threads": 0,
       "propositionalization": {
         "aggregation": [
-          "MODE",
-          "COUNT MINUS COUNT DISTINCT",
           "STDDEV",
-          "FIRST",
+          "MIN",
+          "COUNT DISTINCT",
           "COUNT",
-          "AVG",
           "MEDIAN",
-          "SUM",
           "MAX",
+          "SUM",
+          "FIRST",
           "LAST",
-          "COUNT DISTINCT",
           "TREND",
-          "MIN"
+          "AVG",
+          "COUNT MINUS COUNT DISTINCT",
+          "MODE"
         ],
         "delta_t": 0.0,
         "loss_function": "SquareLoss",
@@ -871,5 +871,49 @@
       "target": "targets",
       "importance": 0.7610534571004377
     }
-  ]
+  ],
+  "metadata": {
+    "population": {
+      "name": "train",
+      "roles": {
+        "categorical": [],
+        "join_key": [
+          "join_key"
+        ],
+        "numerical": [
+          "column_01"
+        ],
+        "target": [
+          "targets"
+        ],
+        "text": [],
+        "time_stamp": [
+          "time_stamp"
+        ],
+        "unused_float": [],
+        "unused_string": []
+      }
+    },
+    "peripheral": [
+      {
+        "name": "perph",
+        "roles": {
+          "categorical": [],
+          "join_key": [
+            "join_key"
+          ],
+          "numerical": [
+            "column_01"
+          ],
+          "target": [],
+          "text": [],
+          "time_stamp": [
+            "time_stamp"
+          ],
+          "unused_float": [],
+          "unused_string": []
+        }
+      }
+    ]
+  }
 }