From f999ab7b922bb12bd0bfbc98530556aebb3bfff8 Mon Sep 17 00:00:00 2001
From: "M. A. Kowalski" <mak60@cam.ac.uk>
Date: Mon, 16 Jun 2025 16:49:40 +0100
Subject: [PATCH] feat: change output format to CSV

Closes #35

Writes a set of CSV files to a folder instead of 'pickle'ing the output
data class. Is more human-readable and should make reading data by
post-processing scripts easier.

The drawback is that if new members of unsupported type are introduced
they may not be written (a warning is raised in that case) and adding
extra case to the export functions might be necessary.
---
 src/dementpy.py       |  2 +-
 src/initialization.py | 52 +++++++++++++++++++++++++++++++++++++++-
 src/output.py         | 55 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/src/dementpy.py b/src/dementpy.py
index c04f8b6..8447bb5 100644
--- a/src/dementpy.py
+++ b/src/dementpy.py
@@ -101,6 +101,6 @@ def main():
     
     #...export the Output_init object to the output_folder using the export() funtion in the utility module 
     os.chdir('../'+output_folder)
-    export(Output_init, site, outname)
+    Output_init.export(outname)
     
 main()
\ No newline at end of file
diff --git a/src/initialization.py b/src/initialization.py
index e0da301..ed6338f 100644
--- a/src/initialization.py
+++ b/src/initialization.py
@@ -6,6 +6,10 @@
 import pandas as pd
 import numpy as np
 
+import warnings
+import numbers
+from pathlib import Path
+
 from substrate import Substrate
 from monomer   import Monomer
 from enzyme    import Enzyme
@@ -163,4 +167,50 @@ def initialize_data(runtime_parameters, site):
         'Psi':  daily_psi                                           # water potential
     }
 
-    return Data_Dictionary
\ No newline at end of file
+    return Data_Dictionary
+
+
+def export_initialization_dict(base_path: Path | str, d: dict) -> None:
+    """Export contents of the initialisation directory to a folder.
+
+    Writes each of the items of a type below to a separate CSV file
+      - pandas.DataFrame
+      - pandas.Series
+      - numpy.ndarray of rank below 2
+    All scalar numbers are grouped in a single CSV 'scalars.csv' file.
+
+    Note:
+        All other items are ignored following a warning!
+        If you need them written you need to add extra entry.
+    """
+
+    # Create space for output
+    base_path = Path(base_path)
+    base_path.mkdir(parents=True, exist_ok=True)
+
+    # Collect all scalar numbers
+    scalar_numbers = dict()
+
+    for name, member in d.items():
+        if isinstance(member, (pd.DataFrame, pd.Series)):
+            fname = name + ".csv"
+            member.to_csv(base_path / fname)
+        elif isinstance(member, np.ndarray):
+            if len(member.shape) <= 2:
+                fname = name + ".csv"
+                np.savetxt(fname, member, delimiter=",")
+            else:
+                warnings.warn(
+                    f"Member '{name}' of initialisation dictionary could not be saved since "
+                    f"it is an array of rank higher than 2 (rank: {len(member.shape)})."
+                )
+        elif isinstance(member, numbers.Number):
+            scalar_numbers[name] = member
+        else:
+            warnings.warn(
+                f"Initialisation member '{name}' has unsupported type '{type(member)}'. "
+                f"It has not been exported to the output directory '{base_path}'."
+            )
+
+    # Print numbers
+    pd.Series(scalar_numbers).to_csv(base_path / "scalars.csv")
diff --git a/src/output.py b/src/output.py
index ac555f2..5971c5c 100644
--- a/src/output.py
+++ b/src/output.py
@@ -1,6 +1,12 @@
 # output.py module dealing with outputs of DEMENTpy.
 # Bin Wang, January, 2020
 
+from pathlib import Path
+import warnings
+import numbers
+
+from initialization import export_initialization_dict
+
 import numpy as np
 import pandas as pd
 
@@ -293,3 +299,52 @@ def microbes_tradeoff(self, ecosystem, year, day):
         GY_grid = ecosystem.Microbe_C_Gain.groupby(level=0,sort=False).sum()
         GY_grid.name = self.cycle*year + (day+1)
         self.Growth_yield = pd.concat([self.Growth_yield,GY_grid],axis=1,sort=False)
+
+
+    def export(self, base_path: Path | str) -> None:
+        """Export contents of the output file to a directory.
+
+        Exports each class member of type pandas.DataFrame to a separate CSV file.
+        All pandas.Series members are combined in a DataFrame and printed dto 'series.csv' file.
+        Similarly all scalar numerical members are grouped in 'scalars.csv'.
+
+        Parameters:
+          base_path : Path
+            A path that names the root directory where contents will be exported.
+            If the directory does not exist it will be created.
+        """
+        # Create space for output
+        base_path = Path(base_path)
+        base_path.mkdir(parents=True, exist_ok=True)
+
+        # Collect all series and scalar data
+        # We will dump them at the end
+        series_data = dict()
+        scalar_numbers = dict()
+
+        for name, member in vars(self).items():
+            if isinstance(member, pd.DataFrame):
+                fname = name + ".csv"
+                member.to_csv(base_path / fname)
+            elif isinstance(member, pd.Series):
+                series_data[name] = member
+            elif isinstance(member, numbers.Number):
+                scalar_numbers[name] = member
+            elif name == "Initialization":
+                # Special case - Initialization dictionary
+                # Serialise it to a subfolder
+                path = base_path / name
+                export_initialization_dict(path, member)
+            else:
+                warnings.warn(
+                    f"Output member '{name}' has unsupported type '{type(member)}'. "
+                    f"It has not been exported to the output directory '{base_path}'."
+                )
+
+        # If it happens that Series have different lengths they will be padded
+        # with missing data labels (NaNs)
+        series_data = pd.concat(series_data, axis=1)
+        series_data.to_csv(base_path / "series.csv")
+
+        # Print numbers
+        pd.Series(scalar_numbers).to_csv(base_path / "scalars.csv")