From bc1be200f546b9f35bfa806b52b0cec522b60233 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Fri, 14 Nov 2025 14:36:47 -0500 Subject: [PATCH 1/4] Add binary options for sptensor --- pyproject.toml | 1 + pyttb/__init__.py | 8 +++- pyttb/export_data.py | 76 +++++++++++++++++++++++++++++++- pyttb/import_data.py | 50 ++++++++++++++++++++- tests/test_import_export_data.py | 20 ++++++--- 5 files changed, 146 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7aa29841..c27c9100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,7 @@ plugins = "numpy.typing.mypy_plugin" [[tool.mypy.overrides]] module = [ "scipy", + "scipy.io", "scipy.sparse", "scipy.sparse.linalg", "scipy.optimize", diff --git a/pyttb/__init__.py b/pyttb/__init__.py index 4492505b..a3418376 100644 --- a/pyttb/__init__.py +++ b/pyttb/__init__.py @@ -13,10 +13,10 @@ from pyttb.cp_als import cp_als from pyttb.cp_apr import cp_apr -from pyttb.export_data import export_data +from pyttb.export_data import export_data, export_data_bin, export_data_mat from pyttb.gcp_opt import gcp_opt from pyttb.hosvd import hosvd -from pyttb.import_data import import_data +from pyttb.import_data import import_data, import_data_bin, import_data_mat from pyttb.khatrirao import khatrirao from pyttb.ktensor import ktensor from pyttb.matlab import matlab_support @@ -42,9 +42,13 @@ def ignore_warnings(ignore=True): cp_als.__name__, cp_apr.__name__, export_data.__name__, + export_data_bin.__name__, + export_data_mat.__name__, gcp_opt.__name__, hosvd.__name__, import_data.__name__, + import_data_bin.__name__, + import_data_mat.__name__, khatrirao.__name__, ktensor.__name__, matlab_support.__name__, diff --git a/pyttb/export_data.py b/pyttb/export_data.py index 520e89a2..7ab6563a 100644 --- a/pyttb/export_data.py +++ b/pyttb/export_data.py @@ -6,13 +6,17 @@ from __future__ import annotations -from typing import TextIO +from typing import TYPE_CHECKING, TextIO import numpy as np +from scipy.io import savemat import pyttb as ttb from pyttb.pyttb_utils import Shape, parse_shape +if TYPE_CHECKING: + from io import BufferedWriter + def export_data( data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, @@ -56,6 +60,76 @@ def export_data( export_array(fp, data, fmt_data) +def export_data_bin( + data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, + filename: str, + index_base: int = 1, +): + """Export tensor-related data to a binary file.""" + if not isinstance(data, (ttb.tensor, ttb.sptensor, ttb.ktensor, np.ndarray)): + raise NotImplementedError(f"Invalid data type for export: {type(data)}") + + with open(filename, "wb") as fp: + if isinstance(data, ttb.tensor): + raise NotImplementedError( + "Binary export not implemented for dense tensors." + ) + elif isinstance(data, ttb.sptensor): + _export_sptensor_bin(fp, data, index_base) + elif isinstance(data, ttb.ktensor): + raise NotImplementedError("Binary export not implemented for ktensors.") + elif isinstance(data, np.ndarray): + raise NotImplementedError("Binary export not implemented for dense arrays.") + + +def export_data_mat( + data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, + filename: str, + index_base: int = 1, +): + """Export tensor-related data to a matlab compatible binary file.""" + if not isinstance(data, (ttb.tensor, ttb.sptensor, ttb.ktensor, np.ndarray)): + raise NotImplementedError(f"Invalid data type for export: {type(data)}") + + if isinstance(data, ttb.tensor): + raise NotImplementedError("Binary export not implemented for dense tensors.") + elif isinstance(data, ttb.sptensor): + _export_sptensor_mat(filename, data, index_base) + elif isinstance(data, ttb.ktensor): + raise NotImplementedError("Binary export not implemented for ktensors.") + elif isinstance(data, np.ndarray): + raise NotImplementedError("Binary export not implemented for dense arrays.") + + +def _export_sptensor_bin(fp: BufferedWriter, data: ttb.sptensor, index_base: int = 1): + """Export sparse array data in coordinate format using NumPy.""" + # TODO add utility for consistent header creation + header = np.array(["sptensor", "F"]) + shape = np.array(data.shape) + nnz = np.array([data.nnz]) + subs = data.subs + index_base + vals = data.vals + np.savez( + fp, + allow_pickle=False, + header=header, + shape=shape, + nnz=nnz, + subs=subs, + vals=vals, + ) + + +def _export_sptensor_mat(filename: str, data: ttb.sptensor, index_base: int = 1): + """Export sparse array data in coordinate format using savemat.""" + header = np.array(["sptensor", "F"]) + shape = np.array(data.shape) + nnz = np.array([data.nnz]) + subs = data.subs + index_base + vals = data.vals + savemat(filename, dict(header=header, shape=shape, nnz=nnz, subs=subs, vals=vals)) + + def export_size(fp: TextIO, shape: Shape): """Export the size of something to a file.""" shape = parse_shape(shape) diff --git a/pyttb/import_data.py b/pyttb/import_data.py index 7c73c26d..9bda4c22 100644 --- a/pyttb/import_data.py +++ b/pyttb/import_data.py @@ -10,8 +10,10 @@ from typing import TextIO import numpy as np +from scipy.io import loadmat import pyttb as ttb +from pyttb.pyttb_utils import to_memory_order def import_data( @@ -65,12 +67,58 @@ def import_data( fp.readline().strip() # Skip factor type fac_shape = import_shape(fp) fac = import_array(fp, np.prod(fac_shape)) - fac = np.reshape(fac, np.array(fac_shape)) + fac = to_memory_order(np.reshape(fac, np.array(fac_shape)), order="F") factor_matrices.append(fac) return ttb.ktensor(factor_matrices, weights, copy=False) raise ValueError("Failed to load tensor data") # pragma: no cover +def import_data_bin( + filename: str, + index_base: int = 1, +): + """Import tensor-related data from a binary file.""" + # Check if file exists + if not os.path.isfile(filename): + raise FileNotFoundError(f"File path {filename} does not exist.") + + npzfile = np.load(filename, allow_pickle=False) + header = npzfile["header"] + data_type = header[0] + + if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: + raise ValueError(f"Invalid data type found: {data_type}") + if data_type == "sptensor": + shape = tuple(npzfile["shape"]) + subs = npzfile["subs"] - index_base + vals = npzfile["vals"] + A = ttb.sptensor(subs, vals, shape) + return A + + +def import_data_mat( + filename: str, + index_base: int = 1, +): + """Import tensor-related data from a binary file.""" + # Check if file exists + if not os.path.isfile(filename): + raise FileNotFoundError(f"File path {filename} does not exist.") + + mat_data = loadmat(filename) + header = mat_data["header"][0] + data_type = header.split()[0] + + if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: + raise ValueError(f"Invalid data type found: {data_type}") + if data_type == "sptensor": + shape = tuple(mat_data["shape"][0]) + subs = mat_data["subs"] - index_base + vals = mat_data["vals"] + A = ttb.sptensor(subs, vals, shape) + return A + + def import_type(fp: TextIO) -> str: """Extract IO data type.""" return fp.readline().strip().split(" ")[0] diff --git a/tests/test_import_export_data.py b/tests/test_import_export_data.py index 254bf75e..7a5bd4c3 100644 --- a/tests/test_import_export_data.py +++ b/tests/test_import_export_data.py @@ -193,23 +193,33 @@ def test_export_data_tensor(sample_tensor): os.unlink(data_filename) -def test_export_data_sptensor(sample_sptensor): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_sptensor(sample_sptensor, save_method, import_method): # truth data S = sample_sptensor # imported data data_filename = os.path.join(os.path.dirname(__file__), "data", "sptensor.out") - ttb.export_data(S, data_filename) + save_method(S, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert S.isequal(X) os.unlink(data_filename) + +def test_export_data_sptensor_fmt(sample_sptensor): data_filename = os.path.join(os.path.dirname(__file__), "data", "sptensor_int.out") - ttb.export_data(S, data_filename, fmt_data="%d") + ttb.export_data(sample_sptensor, data_filename, fmt_data="%d") X = ttb.import_data(data_filename) - assert S.isequal(X) + assert sample_sptensor.isequal(X) os.unlink(data_filename) From eacd4844ea59aacfe2b351415dc04771bbc5a211 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Fri, 14 Nov 2025 14:40:36 -0500 Subject: [PATCH 2/4] Add dense tensor and array --- pyttb/export_data.py | 50 ++++++++++++++++++++++++++++---- pyttb/import_data.py | 28 +++++++++++++----- tests/test_import_export_data.py | 39 ++++++++++++++++++++----- 3 files changed, 95 insertions(+), 22 deletions(-) diff --git a/pyttb/export_data.py b/pyttb/export_data.py index 7ab6563a..397c8318 100644 --- a/pyttb/export_data.py +++ b/pyttb/export_data.py @@ -71,15 +71,13 @@ def export_data_bin( with open(filename, "wb") as fp: if isinstance(data, ttb.tensor): - raise NotImplementedError( - "Binary export not implemented for dense tensors." - ) + _export_tensor_bin(fp, data) elif isinstance(data, ttb.sptensor): _export_sptensor_bin(fp, data, index_base) elif isinstance(data, ttb.ktensor): raise NotImplementedError("Binary export not implemented for ktensors.") elif isinstance(data, np.ndarray): - raise NotImplementedError("Binary export not implemented for dense arrays.") + _export_matrix_bin(fp, data) def export_data_mat( @@ -92,13 +90,13 @@ def export_data_mat( raise NotImplementedError(f"Invalid data type for export: {type(data)}") if isinstance(data, ttb.tensor): - raise NotImplementedError("Binary export not implemented for dense tensors.") + _export_tensor_mat(filename, data) elif isinstance(data, ttb.sptensor): _export_sptensor_mat(filename, data, index_base) elif isinstance(data, ttb.ktensor): raise NotImplementedError("Binary export not implemented for ktensors.") elif isinstance(data, np.ndarray): - raise NotImplementedError("Binary export not implemented for dense arrays.") + _export_matrix_mat(filename, data) def _export_sptensor_bin(fp: BufferedWriter, data: ttb.sptensor, index_base: int = 1): @@ -120,6 +118,32 @@ def _export_sptensor_bin(fp: BufferedWriter, data: ttb.sptensor, index_base: int ) +def _export_tensor_bin(fp: BufferedWriter, data: ttb.tensor): + """Export dense tensor using NumPy.""" + # TODO add utility for consistent header creation + header = np.array(["tensor", "F"]) + internal_data = data.data + np.savez( + fp, + allow_pickle=False, + header=header, + data=internal_data, + ) + + +def _export_matrix_bin(fp: BufferedWriter, data: np.ndarray): + """Export dense matrix using NumPy.""" + # TODO add utility for consistent header creation + header = np.array(["matrix", "F"]) + internal_data = data + np.savez( + fp, + allow_pickle=False, + header=header, + data=internal_data, + ) + + def _export_sptensor_mat(filename: str, data: ttb.sptensor, index_base: int = 1): """Export sparse array data in coordinate format using savemat.""" header = np.array(["sptensor", "F"]) @@ -130,6 +154,20 @@ def _export_sptensor_mat(filename: str, data: ttb.sptensor, index_base: int = 1) savemat(filename, dict(header=header, shape=shape, nnz=nnz, subs=subs, vals=vals)) +def _export_tensor_mat(filename: str, data: ttb.tensor): + """Export dense tensor data using savemat.""" + header = np.array(["tensor", "F"]) + internal_data = data.data + savemat(filename, dict(header=header, data=internal_data)) + + +def _export_matrix_mat(filename: str, data: np.ndarray): + """Export dense tensor data using savemat.""" + header = np.array(["matrix", "F"]) + internal_data = data + savemat(filename, dict(header=header, data=internal_data)) + + def export_size(fp: TextIO, shape: Shape): """Export the size of something to a file.""" shape = parse_shape(shape) diff --git a/pyttb/import_data.py b/pyttb/import_data.py index 9bda4c22..b032b40d 100644 --- a/pyttb/import_data.py +++ b/pyttb/import_data.py @@ -76,7 +76,7 @@ def import_data( def import_data_bin( filename: str, index_base: int = 1, -): +) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: """Import tensor-related data from a binary file.""" # Check if file exists if not os.path.isfile(filename): @@ -88,18 +88,24 @@ def import_data_bin( if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: raise ValueError(f"Invalid data type found: {data_type}") - if data_type == "sptensor": + if data_type == "tensor": + data = npzfile["data"] + return ttb.tensor(data) + elif data_type == "sptensor": shape = tuple(npzfile["shape"]) subs = npzfile["subs"] - index_base vals = npzfile["vals"] - A = ttb.sptensor(subs, vals, shape) - return A + return ttb.sptensor(subs, vals, shape) + elif data_type == "matrix": + data = npzfile["data"] + return data + raise ValueError(f"Invalid data type found: {data_type}") def import_data_mat( filename: str, index_base: int = 1, -): +) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: """Import tensor-related data from a binary file.""" # Check if file exists if not os.path.isfile(filename): @@ -111,12 +117,18 @@ def import_data_mat( if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: raise ValueError(f"Invalid data type found: {data_type}") - if data_type == "sptensor": + if data_type == "tensor": + data = mat_data["data"] + return ttb.tensor(data) + elif data_type == "sptensor": shape = tuple(mat_data["shape"][0]) subs = mat_data["subs"] - index_base vals = mat_data["vals"] - A = ttb.sptensor(subs, vals, shape) - return A + return ttb.sptensor(subs, vals, shape) + elif data_type == "matrix": + data = mat_data["data"] + return data + raise ValueError(f"Invalid data type found: {data_type}") def import_type(fp: TextIO) -> str: diff --git a/tests/test_import_export_data.py b/tests/test_import_export_data.py index 7a5bd4c3..afd6b22f 100644 --- a/tests/test_import_export_data.py +++ b/tests/test_import_export_data.py @@ -153,17 +153,30 @@ def test_import_invalid(): assert "Imported dimensions are not of expected size" in str(excinfo) -def test_export_data_tensor(sample_tensor): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_tensor(sample_tensor, save_method, import_method): # truth data T = sample_tensor data_filename = os.path.join(os.path.dirname(__file__), "data", "tensor.out") - ttb.export_data(T, data_filename) + save_method(T, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert T.isequal(X) os.unlink(data_filename) + +def test_export_data_tensor_format(sample_tensor): + # truth data + T = sample_tensor + # index_base unspecified data_filename = os.path.join(os.path.dirname(__file__), "data", "tensor_int.out") ttb.export_data(T, data_filename, fmt_data="%d") @@ -243,23 +256,33 @@ def test_export_data_ktensor(sample_ktensor): os.unlink(data_filename) -def test_export_data_array(sample_array): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_array(sample_array, save_method, import_method): # truth data M = sample_array # imported data data_filename = os.path.join(os.path.dirname(__file__), "data", "matrix.out") - ttb.export_data(M, data_filename) + save_method(M, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert np.array_equal(M, X) os.unlink(data_filename) + +def test_export_data_array_format(sample_array): data_filename = os.path.join(os.path.dirname(__file__), "data", "matrix_int.out") - ttb.export_data(M, data_filename, fmt_data="%d") + ttb.export_data(sample_array, data_filename, fmt_data="%d") X = ttb.import_data(data_filename) - assert np.array_equal(M, X) + assert np.array_equal(sample_array, X) os.unlink(data_filename) From 265b9198b9087b69c659ee922bcb190902788c93 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Fri, 14 Nov 2025 14:52:03 -0500 Subject: [PATCH 3/4] Add ktensor support --- pyttb/export_data.py | 36 +++++++++++++++++++++++++++++--- pyttb/import_data.py | 16 +++++++++++++- tests/test_import_export_data.py | 20 +++++++++++++----- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/pyttb/export_data.py b/pyttb/export_data.py index 397c8318..2d871834 100644 --- a/pyttb/export_data.py +++ b/pyttb/export_data.py @@ -75,7 +75,7 @@ def export_data_bin( elif isinstance(data, ttb.sptensor): _export_sptensor_bin(fp, data, index_base) elif isinstance(data, ttb.ktensor): - raise NotImplementedError("Binary export not implemented for ktensors.") + _export_ktensor_bin(fp, data) elif isinstance(data, np.ndarray): _export_matrix_bin(fp, data) @@ -94,7 +94,7 @@ def export_data_mat( elif isinstance(data, ttb.sptensor): _export_sptensor_mat(filename, data, index_base) elif isinstance(data, ttb.ktensor): - raise NotImplementedError("Binary export not implemented for ktensors.") + _export_ktensor_mat(filename, data) elif isinstance(data, np.ndarray): _export_matrix_mat(filename, data) @@ -144,6 +144,26 @@ def _export_matrix_bin(fp: BufferedWriter, data: np.ndarray): ) +def _export_ktensor_bin(fp: BufferedWriter, data: ttb.ktensor): + """Export ktensor using NumPy.""" + # TODO add utility for consistent header creation + header = np.array(["ktensor", "F"]) + factor_matrices = data.factor_matrices + num_factor_matrices = len(factor_matrices) + all_factor_matrices = { + f"factor_matrix_{i}": factor_matrices[i] for i in range(num_factor_matrices) + } + weights = data.weights + np.savez( + fp, + allow_pickle=False, + header=header, + num_factor_matrices=num_factor_matrices, + weights=weights, + **all_factor_matrices, + ) + + def _export_sptensor_mat(filename: str, data: ttb.sptensor, index_base: int = 1): """Export sparse array data in coordinate format using savemat.""" header = np.array(["sptensor", "F"]) @@ -162,12 +182,22 @@ def _export_tensor_mat(filename: str, data: ttb.tensor): def _export_matrix_mat(filename: str, data: np.ndarray): - """Export dense tensor data using savemat.""" + """Export dense matrix data using savemat.""" header = np.array(["matrix", "F"]) internal_data = data savemat(filename, dict(header=header, data=internal_data)) +def _export_ktensor_mat(filename: str, data: ttb.ktensor): + """Export ktensor data using savemat.""" + header = np.array(["ktensor", "F"]) + factor_matrices = data.factor_matrices + weights = data.weights + savemat( + filename, dict(header=header, factor_matrices=factor_matrices, weights=weights) + ) + + def export_size(fp: TextIO, shape: Shape): """Export the size of something to a file.""" shape = parse_shape(shape) diff --git a/pyttb/import_data.py b/pyttb/import_data.py index b032b40d..b95de47e 100644 --- a/pyttb/import_data.py +++ b/pyttb/import_data.py @@ -87,7 +87,7 @@ def import_data_bin( data_type = header[0] if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: - raise ValueError(f"Invalid data type found: {data_type}") + raise ValueError(f"Invalid data type found: '{data_type}'") if data_type == "tensor": data = npzfile["data"] return ttb.tensor(data) @@ -99,6 +99,13 @@ def import_data_bin( elif data_type == "matrix": data = npzfile["data"] return data + elif data_type == "ktensor": + num_factor_matrices = int(npzfile["num_factor_matrices"]) + factor_matrices = [ + npzfile[f"factor_matrix_{i}"] for i in range(num_factor_matrices) + ] + weights = npzfile["weights"] + return ttb.ktensor(factor_matrices, weights) raise ValueError(f"Invalid data type found: {data_type}") @@ -128,6 +135,13 @@ def import_data_mat( elif data_type == "matrix": data = mat_data["data"] return data + elif data_type == "ktensor": + factor_matrices = [ + mat_data["factor_matrices"][0, n] + for n in range(mat_data["factor_matrices"].shape[1]) + ] + weights = mat_data["weights"].flatten() + return ttb.ktensor(factor_matrices, weights) raise ValueError(f"Invalid data type found: {data_type}") diff --git a/tests/test_import_export_data.py b/tests/test_import_export_data.py index afd6b22f..ebd78843 100644 --- a/tests/test_import_export_data.py +++ b/tests/test_import_export_data.py @@ -236,23 +236,33 @@ def test_export_data_sptensor_fmt(sample_sptensor): os.unlink(data_filename) -def test_export_data_ktensor(sample_ktensor): +@pytest.mark.parametrize( + ["save_method", "import_method"], + [ + (ttb.export_data, ttb.import_data), + (ttb.export_data_bin, ttb.import_data_bin), + (ttb.export_data_mat, ttb.import_data_mat), + ], +) +def test_export_data_ktensor(sample_ktensor, save_method, import_method): # truth data K = sample_ktensor # imported data data_filename = os.path.join(os.path.dirname(__file__), "data", "ktensor.out") - ttb.export_data(K, data_filename) + save_method(K, data_filename) - X = ttb.import_data(data_filename) + X = import_method(data_filename) assert K.isequal(X) os.unlink(data_filename) + +def test_export_data_ktensor_format(sample_ktensor): data_filename = os.path.join(os.path.dirname(__file__), "data", "ktensor_int.out") - ttb.export_data(K, data_filename, fmt_data="%d", fmt_weights="%d") + ttb.export_data(sample_ktensor, data_filename, fmt_data="%d", fmt_weights="%d") X = ttb.import_data(data_filename) - assert K.isequal(X) + assert sample_ktensor.isequal(X) os.unlink(data_filename) From 9597111e7b5766e240c6eeeff74b1f4422a36787 Mon Sep 17 00:00:00 2001 From: Nick Johnson <24689722+ntjohnson1@users.noreply.github.com> Date: Fri, 14 Nov 2025 15:05:48 -0500 Subject: [PATCH 4/4] Gather some common features, and support older deps --- pyttb/export_data.py | 198 ++++++++++++++++++------------------------- pyttb/import_data.py | 120 ++++++++++++++++---------- 2 files changed, 160 insertions(+), 158 deletions(-) diff --git a/pyttb/export_data.py b/pyttb/export_data.py index 2d871834..22f08bbd 100644 --- a/pyttb/export_data.py +++ b/pyttb/export_data.py @@ -6,7 +6,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING, TextIO +from enum import Enum +from typing import Any, TextIO import numpy as np from scipy.io import savemat @@ -14,8 +15,12 @@ import pyttb as ttb from pyttb.pyttb_utils import Shape, parse_shape -if TYPE_CHECKING: - from io import BufferedWriter + +class ExportFormat(Enum): + """Export format enumeration.""" + + NUMPY = "numpy" + MATLAB = "matlab" def export_data( @@ -66,18 +71,7 @@ def export_data_bin( index_base: int = 1, ): """Export tensor-related data to a binary file.""" - if not isinstance(data, (ttb.tensor, ttb.sptensor, ttb.ktensor, np.ndarray)): - raise NotImplementedError(f"Invalid data type for export: {type(data)}") - - with open(filename, "wb") as fp: - if isinstance(data, ttb.tensor): - _export_tensor_bin(fp, data) - elif isinstance(data, ttb.sptensor): - _export_sptensor_bin(fp, data, index_base) - elif isinstance(data, ttb.ktensor): - _export_ktensor_bin(fp, data) - elif isinstance(data, np.ndarray): - _export_matrix_bin(fp, data) + _export_data_binary(data, filename, ExportFormat.NUMPY, index_base) def export_data_mat( @@ -86,116 +80,90 @@ def export_data_mat( index_base: int = 1, ): """Export tensor-related data to a matlab compatible binary file.""" + _export_data_binary(data, filename, ExportFormat.MATLAB, index_base) + + +def _export_data_binary( + data: ttb.tensor | ttb.ktensor | ttb.sptensor | np.ndarray, + filename: str, + export_format: ExportFormat, + index_base: int = 1, +): + """Export tensor-related data to a binary file using specified format.""" if not isinstance(data, (ttb.tensor, ttb.sptensor, ttb.ktensor, np.ndarray)): raise NotImplementedError(f"Invalid data type for export: {type(data)}") + # Prepare data for export based on type if isinstance(data, ttb.tensor): - _export_tensor_mat(filename, data) + export_data_dict = _prepare_tensor_data(data) elif isinstance(data, ttb.sptensor): - _export_sptensor_mat(filename, data, index_base) + export_data_dict = _prepare_sptensor_data(data, index_base) elif isinstance(data, ttb.ktensor): - _export_ktensor_mat(filename, data) + export_data_dict = _prepare_ktensor_data(data) elif isinstance(data, np.ndarray): - _export_matrix_mat(filename, data) - - -def _export_sptensor_bin(fp: BufferedWriter, data: ttb.sptensor, index_base: int = 1): - """Export sparse array data in coordinate format using NumPy.""" - # TODO add utility for consistent header creation - header = np.array(["sptensor", "F"]) - shape = np.array(data.shape) - nnz = np.array([data.nnz]) - subs = data.subs + index_base - vals = data.vals - np.savez( - fp, - allow_pickle=False, - header=header, - shape=shape, - nnz=nnz, - subs=subs, - vals=vals, - ) - - -def _export_tensor_bin(fp: BufferedWriter, data: ttb.tensor): - """Export dense tensor using NumPy.""" - # TODO add utility for consistent header creation - header = np.array(["tensor", "F"]) - internal_data = data.data - np.savez( - fp, - allow_pickle=False, - header=header, - data=internal_data, - ) - - -def _export_matrix_bin(fp: BufferedWriter, data: np.ndarray): - """Export dense matrix using NumPy.""" - # TODO add utility for consistent header creation - header = np.array(["matrix", "F"]) - internal_data = data - np.savez( - fp, - allow_pickle=False, - header=header, - data=internal_data, - ) - - -def _export_ktensor_bin(fp: BufferedWriter, data: ttb.ktensor): - """Export ktensor using NumPy.""" - # TODO add utility for consistent header creation - header = np.array(["ktensor", "F"]) + export_data_dict = _prepare_matrix_data(data) + else: + raise NotImplementedError(f"Unsupported data type: {type(data)}") + + # Save using appropriate format + if export_format == ExportFormat.NUMPY: + with open(filename, "wb") as fp: + np.savez(fp, allow_pickle=False, **export_data_dict) + elif export_format == ExportFormat.MATLAB: + savemat(filename, export_data_dict) + else: + raise ValueError(f"Unsupported export format: {export_format}") + + +def _create_header(data_type: str) -> np.ndarray: + """Create consistent header for tensor data.""" + # TODO encode version information + return np.array([data_type, "F"]) + + +def _prepare_sptensor_data(data: ttb.sptensor, index_base: int = 1) -> dict[str, Any]: + """Prepare sparse tensor data for export.""" + return { + "header": _create_header("sptensor"), + "shape": np.array(data.shape), + "nnz": np.array([data.nnz]), + "subs": data.subs + index_base, + "vals": data.vals, + } + + +def _prepare_tensor_data(data: ttb.tensor) -> dict[str, Any]: + """Prepare dense tensor data for export.""" + return { + "header": _create_header("tensor"), + "data": data.data, + } + + +def _prepare_matrix_data(data: np.ndarray) -> dict[str, Any]: + """Prepare matrix data for export.""" + return { + "header": _create_header("matrix"), + "data": data, + } + + +def _prepare_ktensor_data(data: ttb.ktensor) -> dict[str, Any]: + """Prepare ktensor data for export.""" factor_matrices = data.factor_matrices num_factor_matrices = len(factor_matrices) - all_factor_matrices = { - f"factor_matrix_{i}": factor_matrices[i] for i in range(num_factor_matrices) + + export_dict = { + "header": _create_header("ktensor"), + "weights": data.weights, + "num_factor_matrices": num_factor_matrices, } - weights = data.weights - np.savez( - fp, - allow_pickle=False, - header=header, - num_factor_matrices=num_factor_matrices, - weights=weights, - **all_factor_matrices, - ) - - -def _export_sptensor_mat(filename: str, data: ttb.sptensor, index_base: int = 1): - """Export sparse array data in coordinate format using savemat.""" - header = np.array(["sptensor", "F"]) - shape = np.array(data.shape) - nnz = np.array([data.nnz]) - subs = data.subs + index_base - vals = data.vals - savemat(filename, dict(header=header, shape=shape, nnz=nnz, subs=subs, vals=vals)) - - -def _export_tensor_mat(filename: str, data: ttb.tensor): - """Export dense tensor data using savemat.""" - header = np.array(["tensor", "F"]) - internal_data = data.data - savemat(filename, dict(header=header, data=internal_data)) - - -def _export_matrix_mat(filename: str, data: np.ndarray): - """Export dense matrix data using savemat.""" - header = np.array(["matrix", "F"]) - internal_data = data - savemat(filename, dict(header=header, data=internal_data)) - - -def _export_ktensor_mat(filename: str, data: ttb.ktensor): - """Export ktensor data using savemat.""" - header = np.array(["ktensor", "F"]) - factor_matrices = data.factor_matrices - weights = data.weights - savemat( - filename, dict(header=header, factor_matrices=factor_matrices, weights=weights) - ) + + # Add individual factor matrices for NumPy compatibility + for i in range(num_factor_matrices): + export_dict[f"factor_matrix_{i}"] = factor_matrices[i] + + return export_dict def export_size(fp: TextIO, shape: Shape): diff --git a/pyttb/import_data.py b/pyttb/import_data.py index b95de47e..a9c5d7fe 100644 --- a/pyttb/import_data.py +++ b/pyttb/import_data.py @@ -78,70 +78,104 @@ def import_data_bin( index_base: int = 1, ) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: """Import tensor-related data from a binary file.""" - # Check if file exists - if not os.path.isfile(filename): - raise FileNotFoundError(f"File path {filename} does not exist.") - - npzfile = np.load(filename, allow_pickle=False) - header = npzfile["header"] - data_type = header[0] - if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: - raise ValueError(f"Invalid data type found: '{data_type}'") - if data_type == "tensor": - data = npzfile["data"] - return ttb.tensor(data) - elif data_type == "sptensor": - shape = tuple(npzfile["shape"]) - subs = npzfile["subs"] - index_base - vals = npzfile["vals"] - return ttb.sptensor(subs, vals, shape) - elif data_type == "matrix": - data = npzfile["data"] - return data - elif data_type == "ktensor": - num_factor_matrices = int(npzfile["num_factor_matrices"]) - factor_matrices = [ - npzfile[f"factor_matrix_{i}"] for i in range(num_factor_matrices) - ] - weights = npzfile["weights"] - return ttb.ktensor(factor_matrices, weights) - raise ValueError(f"Invalid data type found: {data_type}") + def load_bin_data(filename: str): + npzfile = np.load(filename, allow_pickle=False) + return { + "header": npzfile["header"][0], + "data": npzfile.get("data"), + "shape": tuple(npzfile["shape"]) if "shape" in npzfile else None, + "subs": npzfile.get("subs"), + "vals": npzfile.get("vals"), + "num_factor_matrices": int(npzfile["num_factor_matrices"]) + if "num_factor_matrices" in npzfile + else None, + "factor_matrices": [ + npzfile[f"factor_matrix_{i}"] + for i in range(int(npzfile["num_factor_matrices"])) + ] + if "num_factor_matrices" in npzfile + else None, + "weights": npzfile.get("weights"), + } + + return _import_tensor_data(filename, index_base, load_bin_data) def import_data_mat( filename: str, index_base: int = 1, ) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: - """Import tensor-related data from a binary file.""" + """Import tensor-related data from a MATLAB file.""" + + def load_mat_data(filename: str): + mat_data = loadmat(filename) + header = mat_data["header"][0] + return { + "header": header.split()[0], + "data": mat_data.get("data"), + "shape": tuple(mat_data["shape"][0]) if "shape" in mat_data else None, + "subs": mat_data.get("subs"), + "vals": mat_data.get("vals"), + "num_factor_matrices": int(mat_data["num_factor_matrices"]) + if "num_factor_matrices" in mat_data + else None, + "factor_matrices": [ + mat_data[f"factor_matrix_{i}"] + for i in range(int(mat_data["num_factor_matrices"])) + ] + if "num_factor_matrices" in mat_data + else None, + "weights": mat_data.get("weights").flatten() + if "weights" in mat_data + else None, + } + + return _import_tensor_data(filename, index_base, load_mat_data) + + +def _import_tensor_data( + filename: str, + index_base: int, + data_loader, +) -> ttb.sptensor | ttb.ktensor | ttb.tensor | np.ndarray: + """Generalized function to import tensor data from different file formats. + + Parameters + ---------- + filename: + File to import. + index_base: + Index basing allows interoperability (Primarily between python and MATLAB). + data_loader: + Function that loads and structures the data from the file. + """ # Check if file exists if not os.path.isfile(filename): raise FileNotFoundError(f"File path {filename} does not exist.") - mat_data = loadmat(filename) - header = mat_data["header"][0] - data_type = header.split()[0] + loaded_data = data_loader(filename) + data_type = loaded_data["header"] if data_type not in ["tensor", "sptensor", "matrix", "ktensor"]: - raise ValueError(f"Invalid data type found: {data_type}") + raise ValueError(f"Invalid data type found: '{data_type}'") + if data_type == "tensor": - data = mat_data["data"] + data = loaded_data["data"] return ttb.tensor(data) elif data_type == "sptensor": - shape = tuple(mat_data["shape"][0]) - subs = mat_data["subs"] - index_base - vals = mat_data["vals"] + shape = loaded_data["shape"] + subs = loaded_data["subs"] - index_base + vals = loaded_data["vals"] return ttb.sptensor(subs, vals, shape) elif data_type == "matrix": - data = mat_data["data"] + data = loaded_data["data"] return data elif data_type == "ktensor": - factor_matrices = [ - mat_data["factor_matrices"][0, n] - for n in range(mat_data["factor_matrices"].shape[1]) - ] - weights = mat_data["weights"].flatten() + factor_matrices = loaded_data["factor_matrices"] + weights = loaded_data["weights"] return ttb.ktensor(factor_matrices, weights) + raise ValueError(f"Invalid data type found: {data_type}")