diff --git a/pyproject.toml b/pyproject.toml index 7aa29841..c27c9100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,7 @@ plugins = "numpy.typing.mypy_plugin" [[tool.mypy.overrides]] module = [ "scipy", + "scipy.io", "scipy.sparse", "scipy.sparse.linalg", "scipy.optimize", diff --git a/pyttb/__init__.py b/pyttb/__init__.py index 4492505b..a3418376 100644 --- a/pyttb/__init__.py +++ b/pyttb/__init__.py @@ -13,10 +13,10 @@ from pyttb.cp_als import cp_als from pyttb.cp_apr import cp_apr -from pyttb.export_data import export_data +from pyttb.export_data import export_data, export_data_bin, export_data_mat from pyttb.gcp_opt import gcp_opt from pyttb.hosvd import hosvd -from pyttb.import_data import import_data +from pyttb.import_data import import_data, import_data_bin, import_data_mat from pyttb.khatrirao import khatrirao from pyttb.ktensor import ktensor from pyttb.matlab import matlab_support @@ -42,9 +42,13 @@ def ignore_warnings(ignore=True): cp_als.__name__, cp_apr.__name__, export_data.__name__, + export_data_bin.__name__, + export_data_mat.__name__, gcp_opt.__name__, hosvd.__name__, import_data.__name__, + import_data_bin.__name__, + import_data_mat.__name__, khatrirao.__name__, ktensor.__name__, matlab_support.__name__, diff --git a/pyttb/export_data.py b/pyttb/export_data.py index 520e89a2..22f08bbd 100644 --- a/pyttb/export_data.py +++ b/pyttb/export_data.py @@ -6,14 +6,23 @@ from __future__ import annotations -from typing import TextIO +from enum import Enum +from typing import Any, TextIO import numpy as np +from scipy.io import savemat import pyttb as ttb from pyttb.pyttb_utils import Shape, parse_shape +class ExportFormat(Enum): + """Export format enumeration.""" + + NUMPY = "numpy" + MATLAB = "matlab" + + def export_data( data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, filename: str, @@ -56,6 +65,107 @@ def export_data( export_array(fp, data, fmt_data) +def export_data_bin( + data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, + filename: str, + index_base: int = 1, +): + """Export tensor-related data to a binary file.""" + _export_data_binary(data, filename, ExportFormat.NUMPY, index_base) + + +def export_data_mat( + data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, + filename: str, + index_base: int = 1, +): + """Export tensor-related data to a matlab compatible binary file.""" + _export_data_binary(data, filename, ExportFormat.MATLAB, index_base) + + +def _export_data_binary( + data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, + filename: str, + export_format: ExportFormat, + index_base: int = 1, +): + """Export tensor-related data to a binary file using specified format.""" + if not isinstance(data, (ttb.tensor, ttb.sptensor, ttb.ktensor, np.ndarray)): + raise NotImplementedError(f"Invalid data type for export: {type(data)}") + + # Prepare data for export based on type + if isinstance(data, ttb.tensor): + export_data_dict = _prepare_tensor_data(data) + elif isinstance(data, ttb.sptensor): + export_data_dict = _prepare_sptensor_data(data, index_base) + elif isinstance(data, ttb.ktensor): + export_data_dict = _prepare_ktensor_data(data) + elif isinstance(data, np.ndarray): + export_data_dict = _prepare_matrix_data(data) + else: + raise NotImplementedError(f"Unsupported data type: {type(data)}") + + # Save using appropriate format + if export_format == ExportFormat.NUMPY: + with open(filename, "wb") as fp: + np.savez(fp, allow_pickle=False, **export_data_dict) + elif export_format == ExportFormat.MATLAB: + savemat(filename, export_data_dict) + else: + raise ValueError(f"Unsupported export format: {export_format}") + + +def _create_header(data_type: str) -> np.ndarray: + """Create consistent header for tensor data.""" + # TODO encode version information + return np.array([data_type, "F"]) + + +def _prepare_sptensor_data(data: ttb.sptensor, index_base: int = 1) -> dict[str, Any]: + """Prepare sparse tensor data for export.""" + return { + "header": _create_header("sptensor"), + "shape": np.array(data.shape), + "nnz": np.array([data.nnz]), + "subs": data.subs + index_base, + "vals": data.vals, + } + + +def _prepare_tensor_data(data: ttb.tensor) -> dict[str, Any]: + """Prepare dense tensor data for export.""" + return { + "header": _create_header("tensor"), + "data": data.data, + } + + +def _prepare_matrix_data(data: np.ndarray) -> dict[str, Any]: + """Prepare matrix data for export.""" + return { + "header": _create_header("matrix"), + "data": data, + } + + +def _prepare_ktensor_data(data: ttb.ktensor) -> dict[str, Any]: + """Prepare ktensor data for export.""" + factor_matrices = data.factor_matrices + num_factor_matrices = len(factor_matrices) + + export_dict = { + "header": _create_header("ktensor"), + "weights": data.weights, + "num_factor_matrices": num_factor_matrices, + } + + # Add individual factor matrices for NumPy compatibility + for i in range(num_factor_matrices): + export_dict[f"factor_matrix_{i}"] = factor_matrices[i] + + return export_dict + + def export_size(fp: TextIO, shape: Shape): """Export the size of something to a file.""" shape = parse_shape(shape) diff --git a/pyttb/import_data.py b/pyttb/import_data.py index 7c73c26d..a9c5d7fe 100644 --- a/pyttb/import_data.py +++ b/pyttb/import_data.py @@ -10,8 +10,10 @@ from typing import TextIO import numpy as np +from scipy.io import loadmat import pyttb as ttb +from pyttb.pyttb_utils import to_memory_order def import_data( @@ -65,12 +67,118 @@ def import_data( fp.readline().strip() # Skip factor type fac_shape = import_shape(fp) fac = import_array(fp, np.prod(fac_shape)) - fac = np.reshape(fac, np.array(fac_shape)) + fac = to_memory_order(np.reshape(fac, np.array(fac_shape)), order="F") factor_matrices.append(fac) return ttb.ktensor(factor_matrices, weights, copy=False) raise ValueError("Failed to load tensor data") # pragma: no cover +def import_data_bin( + filename: str, + index_base: int = 1, +) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: + """Import tensor-related data from a binary file.""" + + def load_bin_data(filename: str): + npzfile = np.load(filename, allow_pickle=False) + return { + "header": npzfile["header"][0], + "data": npzfile.get("data"), + "shape": tuple(npzfile["shape"]) if "shape" in npzfile else None, + "subs": npzfile.get("subs"), + "vals": npzfile.get("vals"), + "num_factor_matrices": int(npzfile["num_factor_matrices"]) + if "num_factor_matrices" in npzfile + else None, + "factor_matrices": [ + npzfile[f"factor_matrix_{i}"] + for i in range(int(npzfile["num_factor_matrices"])) + ] + if "num_factor_matrices" in npzfile + else None, + "weights": npzfile.get("weights"), + } + + return _import_tensor_data(filename, index_base, load_bin_data) + + +def import_data_mat( + filename: str, + index_base: int = 1, +) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: + """Import tensor-related data from a MATLAB file.""" + + def load_mat_data(filename: str): + mat_data = loadmat(filename) + header = mat_data["header"][0] + return { + "header": header.split()[0], + "data": mat_data.get("data"), + "shape": tuple(mat_data["shape"][0]) if "shape" in mat_data else None, + "subs": mat_data.get("subs"), + "vals": mat_data.get("vals"), + "num_factor_matrices": int(mat_data["num_factor_matrices"]) + if "num_factor_matrices" in mat_data + else None, + "factor_matrices": [ + mat_data[f"factor_matrix_{i}"] + for i in range(int(mat_data["num_factor_matrices"])) + ] + if "num_factor_matrices" in mat_data + else None, + "weights": mat_data.get("weights").flatten() + if "weights" in mat_data + else None, + } + + return _import_tensor_data(filename, index_base, load_mat_data) + + +def _import_tensor_data( + filename: str, + index_base: int, + data_loader, +) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: + """Generalized function to import tensor data from different file formats. + + Parameters + ---------- + filename: + File to import. + index_base: + Index basing allows interoperability (Primarily between python and MATLAB). + data_loader: + Function that loads and structures the data from the file. + """ + # Check if file exists + if not os.path.isfile(filename): + raise FileNotFoundError(f"File path {filename} does not exist.") + + loaded_data = data_loader(filename) + data_type = loaded_data["header"] + + if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: + raise ValueError(f"Invalid data type found: '{data_type}'") + + if data_type == "tensor": + data = loaded_data["data"] + return ttb.tensor(data) + elif data_type == "sptensor": + shape = loaded_data["shape"] + subs = loaded_data["subs"] - index_base + vals = loaded_data["vals"] + return ttb.sptensor(subs, vals, shape) + elif data_type == "matrix": + data = loaded_data["data"] + return data + elif data_type == "ktensor": + factor_matrices = loaded_data["factor_matrices"] + weights = loaded_data["weights"] + return ttb.ktensor(factor_matrices, weights) + + raise ValueError(f"Invalid data type found: {data_type}") + + def import_type(fp: TextIO) -> str: """Extract IO data type.""" return fp.readline().strip().split(" ")[0] diff --git a/tests/test_import_export_data.py b/tests/test_import_export_data.py index 254bf75e..ebd78843 100644 --- a/tests/test_import_export_data.py +++ b/tests/test_import_export_data.py @@ -153,17 +153,30 @@ def test_import_invalid(): assert "Imported dimensions are not of expected size" in str(excinfo) -def test_export_data_tensor(sample_tensor): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_tensor(sample_tensor, save_method, import_method): # truth data T = sample_tensor data_filename = os.path.join(os.path.dirname(__file__), "data", "tensor.out") - ttb.export_data(T, data_filename) + save_method(T, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert T.isequal(X) os.unlink(data_filename) + +def test_export_data_tensor_format(sample_tensor): + # truth data + T = sample_tensor + # index_base unspecified data_filename = os.path.join(os.path.dirname(__file__), "data", "tensor_int.out") ttb.export_data(T, data_filename, fmt_data="%d") @@ -193,63 +206,93 @@ def test_export_data_tensor(sample_tensor): os.unlink(data_filename) -def test_export_data_sptensor(sample_sptensor): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_sptensor(sample_sptensor, save_method, import_method): # truth data S = sample_sptensor # imported data data_filename = os.path.join(os.path.dirname(__file__), "data", "sptensor.out") - ttb.export_data(S, data_filename) + save_method(S, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert S.isequal(X) os.unlink(data_filename) + +def test_export_data_sptensor_fmt(sample_sptensor): data_filename = os.path.join(os.path.dirname(__file__), "data", "sptensor_int.out") - ttb.export_data(S, data_filename, fmt_data="%d") + ttb.export_data(sample_sptensor, data_filename, fmt_data="%d") X = ttb.import_data(data_filename) - assert S.isequal(X) + assert sample_sptensor.isequal(X) os.unlink(data_filename) -def test_export_data_ktensor(sample_ktensor): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_ktensor(sample_ktensor, save_method, import_method): # truth data K = sample_ktensor # imported data data_filename = os.path.join(os.path.dirname(__file__), "data", "ktensor.out") - ttb.export_data(K, data_filename) + save_method(K, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert K.isequal(X) os.unlink(data_filename) + +def test_export_data_ktensor_format(sample_ktensor): data_filename = os.path.join(os.path.dirname(__file__), "data", "ktensor_int.out") - ttb.export_data(K, data_filename, fmt_data="%d", fmt_weights="%d") + ttb.export_data(sample_ktensor, data_filename, fmt_data="%d", fmt_weights="%d") X = ttb.import_data(data_filename) - assert K.isequal(X) + assert sample_ktensor.isequal(X) os.unlink(data_filename) -def test_export_data_array(sample_array): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_array(sample_array, save_method, import_method): # truth data M = sample_array # imported data data_filename = os.path.join(os.path.dirname(__file__), "data", "matrix.out") - ttb.export_data(M, data_filename) + save_method(M, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert np.array_equal(M, X) os.unlink(data_filename) + +def test_export_data_array_format(sample_array): data_filename = os.path.join(os.path.dirname(__file__), "data", "matrix_int.out") - ttb.export_data(M, data_filename, fmt_data="%d") + ttb.export_data(sample_array, data_filename, fmt_data="%d") X = ttb.import_data(data_filename) - assert np.array_equal(M, X) + assert np.array_equal(sample_array, X) os.unlink(data_filename)