From fe7bee080328d23e4484b733e6f82b467142466b Mon Sep 17 00:00:00 2001 From: Matthias Arzt Date: Fri, 24 Oct 2025 16:01:50 +0200 Subject: [PATCH 1/7] refactor usage of temporary files in multipers.io The code can be simplified by using python's "tempfile" library. Which has some benefits: * A system dependent temp directory is automatically selected. For example '/tmp/' on linux. * Temp files are automatically deleted. * Users can customize the temp directory using the environment variable "TMPDIR". --- multipers/io.pyx | 120 ++++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 74 deletions(-) diff --git a/multipers/io.pyx b/multipers/io.pyx index 6dc86b0a..4018a591 100644 --- a/multipers/io.pyx +++ b/multipers/io.pyx @@ -1,4 +1,5 @@ import re +import tempfile from gudhi import SimplexTree import gudhi as gd import numpy as np @@ -106,16 +107,6 @@ cdef dict[str,str|None] pathes = { # multi_chunk_out_path:str|os.PathLike = "multipers_multi_chunk_output.scc" # function_delaunay_out_path:str|os.PathLike = "function_delaunay_output.scc" # function_delaunay_in_path:str|os.PathLike = "function_delaunay_input.txt" # point cloud -input_path:str|os.PathLike = "multipers_input.scc" -output_path:str|os.PathLike = "multipers_output.scc" - -def _put_temp_files_to_ram(): - global input_path,output_path - shm_memory = "/tmp/" # on unix, we can write in RAM instead of disk. - if os.access(shm_memory, os.W_OK) and not input_path.startswith(shm_memory): - input_path = shm_memory + input_path - output_path = shm_memory + output_path -_put_temp_files_to_ram() ## TODO : optimize with Python.h ? @@ -309,7 +300,6 @@ def scc_reduce_from_str_to_slicer( bool full_resolution=True, int dimension: int | np.int64 = 1, bool clear: bool = True, - id: Optional[str] = None, # For parallel stuff bool verbose:bool=False, backend:Literal["mpfree","multi_chunk","twopac"]="mpfree", shift_dimension=0 @@ -327,43 +317,38 @@ def scc_reduce_from_str_to_slicer( verbose: bool backend: "mpfree", "multi_chunk" or "2pac" """ - global pathes, input_path, output_path + global pathes _init_external_softwares(requires=[backend]) + with tempfile.TemporaryDirectory(prefix="multipers", delete=clear) as tmpdir: + output_path = os.path.join(tmpdir, "multipers_output.scc") + + resolution_str = "--resolution" if full_resolution else "" + + if not os.path.exists(path): + raise ValueError(f"No file found at {path}.") + + verbose_arg = "> /dev/null 2>&1" if not verbose else "" + if backend == "mpfree": + more_verbose = "-v" if verbose else "" + command = ( + f"{pathes[backend]} {more_verbose} {resolution_str} --dim={dimension} {path} {output_path} {verbose_arg}" + ) + elif backend == "multi_chunk": + command = ( + f"{pathes[backend]} {path} {output_path} {verbose_arg}" + ) + elif backend in ["twopac", "2pac"]: + command = ( + f"{pathes[backend]} -f {path} --scc-input -n{dimension} --save-resolution-scc {output_path} {verbose_arg}" + ) + else: + raise ValueError(f"Unsupported backend {backend}.") + if verbose: + print(f"Calling :\n\n {command}") + os.system(command) - resolution_str = "--resolution" if full_resolution else "" - # print(mpfree_in_path + id, mpfree_out_path + id) - if id is None: - id = str(threading.get_native_id()) - if not os.path.exists(path): - raise ValueError(f"No file found at {path}.") - if os.path.exists(output_path + id): - os.remove(output_path + id) - verbose_arg = "> /dev/null 2>&1" if not verbose else "" - if backend == "mpfree": - more_verbose = "-v" if verbose else "" - command = ( - f"{pathes[backend]} {more_verbose} {resolution_str} --dim={dimension} {path} {output_path+id} {verbose_arg}" - ) - elif backend == "multi_chunk": - command = ( - f"{pathes[backend]} {path} {output_path+id} {verbose_arg}" - ) - elif backend in ["twopac", "2pac"]: - command = ( - f"{pathes[backend]} -f {path} --scc-input -n{dimension} --save-resolution-scc {output_path+id} {verbose_arg}" - ) - else: - raise ValueError(f"Unsupported backend {backend}.") - if verbose: - print(f"Calling :\n\n {command}") - os.system(command) - - slicer._build_from_scc_file(path=output_path+id, shift_dimension=shift_dimension) - - if clear: - clear_io(input_path+id, output_path + id) - + slicer._build_from_scc_file(path=output_path, shift_dimension=shift_dimension) # def reduce_complex( # complex, # Simplextree, Slicer, or str @@ -474,7 +459,6 @@ def function_delaunay_presentation_to_slicer( slicer, point_cloud:np.ndarray, function_values:np.ndarray, - id:Optional[str] = None, bool clear:bool = True, bool verbose:bool=False, int degree = -1, @@ -491,38 +475,26 @@ def function_delaunay_presentation_to_slicer( degree: computes minimal presentation of this degree if given verbose : bool """ - if id is None: - id = str(threading.get_native_id()) - global input_path, output_path, pathes - backend = "function_delaunay" - _init_external_softwares(requires=[backend]) - - to_write = np.concatenate([point_cloud, function_values.reshape(-1,1)], axis=1) - np.savetxt(input_path+id,to_write,delimiter=' ') - verbose_arg = "> /dev/null 2>&1" if not verbose else "" - degree_arg = f"--minpres {degree}" if degree >= 0 else "" - multi_chunk_arg = "--multi-chunk" if multi_chunk else "" - if os.path.exists(output_path + id): - os.remove(output_path+ id) - command = f"{pathes[backend]} {degree_arg} {multi_chunk_arg} {input_path+id} {output_path+id} {verbose_arg} --no-delaunay-compare" - if verbose: - print(command) - os.system(command) - - slicer._build_from_scc_file(path=output_path+id, shift_dimension=-1 if degree <= 0 else degree-1 ) - - if clear: - clear_io(output_path + id, input_path + id) + global pathes + with tempfile.TemporaryDirectory(prefix="multipers", delete=clear) as tmpdir: + input_path = os.path.join(tmpdir, "multipers_input.scc") + output_path = os.path.join(tmpdir, "multipers_output.scc") + backend = "function_delaunay" + _init_external_softwares(requires=[backend]) -def clear_io(*args): - """Removes temporary files""" - global input_path,output_path - for x in [input_path,output_path] + list(args): - if os.path.exists(x): - os.remove(x) + to_write = np.concatenate([point_cloud, function_values.reshape(-1,1)], axis=1) + np.savetxt(input_path,to_write,delimiter=' ') + verbose_arg = "> /dev/null 2>&1" if not verbose else "" + degree_arg = f"--minpres {degree}" if degree >= 0 else "" + multi_chunk_arg = "--multi-chunk" if multi_chunk else "" + command = f"{pathes[backend]} {degree_arg} {multi_chunk_arg} {input_path} {output_path} {verbose_arg} --no-delaunay-compare" + if verbose: + print(command) + os.system(command) + slicer._build_from_scc_file(path=output_path, shift_dimension=-1 if degree <= 0 else degree-1 ) From b4b5ab5e27ccdd42523097ba30ae53b7c64134da Mon Sep 17 00:00:00 2001 From: Matthias Arzt Date: Fri, 24 Oct 2025 16:38:09 +0200 Subject: [PATCH 2/7] fix: don't call outdated method --- multipers/io.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/multipers/io.pyx b/multipers/io.pyx index 4018a591..8d1471cf 100644 --- a/multipers/io.pyx +++ b/multipers/io.pyx @@ -212,7 +212,6 @@ def _init_external_softwares(requires=[]): any = any or (soft in requires) if any: - _put_temp_files_to_ram() for soft in requires: if pathes[soft] is None: global doc_soft_urls From 974a6bb2072c0c1bdb308dab894a8bbbb4788920 Mon Sep 17 00:00:00 2001 From: Matthias Arzt Date: Fri, 24 Oct 2025 17:37:44 +0200 Subject: [PATCH 3/7] fix: temp file usage in slicer.pyx.tp multipers.io no longer has a global variable input_path --- multipers/slicer.pyx.tp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/multipers/slicer.pyx.tp b/multipers/slicer.pyx.tp index 273fca6e..7ade75bb 100644 --- a/multipers/slicer.pyx.tp +++ b/multipers/slicer.pyx.tp @@ -870,7 +870,8 @@ def minimal_presentation( and returns it as a slicer. Backends differents than `mpfree` are unstable. """ - from multipers.io import _init_external_softwares, input_path, scc_reduce_from_str_to_slicer + from multipers.io import _init_external_softwares, scc_reduce_from_str_to_slicer + import tempfile if is_slicer(slicer) and slicer.is_minpres and not force: from warnings import warn warn(f"(unnecessary computation) The slicer seems to be already reduced, from homology of degree {slicer.minpres_degree}.") @@ -891,19 +892,21 @@ def minimal_presentation( if dtype is None: dtype = slicer.dtype dimension = slicer.dimension - degree # latest = L-1, which is empty, -1 for degree 0, -2 for degree 1 etc. - slicer.to_scc(path=input_path+id, strip_comments=True, degree=degree-1, unsqueeze = False) - new_slicer = multipers.Slicer(None,backend=slicer_backend, vineyard=vineyard, dtype=dtype) - if backend=="mpfree": - shift_dimension=degree-1 - else: - shift_dimension=degree - scc_reduce_from_str_to_slicer(path=input_path+id, slicer=new_slicer, dimension=dimension, backend=backend, shift_dimension=shift_dimension, **minpres_kwargs) - - new_slicer.minpres_degree = degree - new_slicer.filtration_grid = slicer.filtration_grid if slicer.is_squeezed else None - if new_slicer.is_squeezed and auto_clean: - new_slicer = new_slicer._clean_filtration_grid() - return new_slicer + with tempfile.TemporaryDirectory(prefix="multipers") as tmpdir: + tmp_path = os.path.join(tmpdir, "multipers.scc") + slicer.to_scc(path=tmp_path, strip_comments=True, degree=degree-1, unsqueeze = False) + new_slicer = multipers.Slicer(None,backend=slicer_backend, vineyard=vineyard, dtype=dtype) + if backend=="mpfree": + shift_dimension=degree-1 + else: + shift_dimension=degree + scc_reduce_from_str_to_slicer(path=tmp_path, slicer=new_slicer, dimension=dimension, backend=backend, shift_dimension=shift_dimension, **minpres_kwargs) + + new_slicer.minpres_degree = degree + new_slicer.filtration_grid = slicer.filtration_grid if slicer.is_squeezed else None + if new_slicer.is_squeezed and auto_clean: + new_slicer = new_slicer._clean_filtration_grid() + return new_slicer def to_simplextree(s:Slicer_type, max_dim:int=-1) -> SimplexTreeMulti_type: From 36bfa42e309b2508eb024007a32b9ad081271a6d Mon Sep 17 00:00:00 2001 From: David Loiseaux Date: Mon, 27 Oct 2025 14:12:18 +0100 Subject: [PATCH 4/7] rm: superflouous id --- multipers/slicer.pyx.tp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/multipers/slicer.pyx.tp b/multipers/slicer.pyx.tp index 7ade75bb..9ecae1ab 100644 --- a/multipers/slicer.pyx.tp +++ b/multipers/slicer.pyx.tp @@ -645,7 +645,6 @@ cdef class {{D['PYTHON_TYPE']}}: str backend:Literal["mpfree", "2pac"]="mpfree", str slicer_backend:Literal["matrix","clement","graph"]="matrix", bool vineyard={{D['IS_VINE']}}, - id :Optional[str] = None, dtype = {{D['PY_VALUE_TYPE']}}, **minpres_kwargs )->Slicer_type: @@ -653,7 +652,7 @@ cdef class {{D['PYTHON_TYPE']}}: Computes the minimal presentation of the slicer, and returns it as a new slicer. See :func:`multipers.slicer.minimal_presentation`. """ - new_slicer = minimal_presentation(self, degree=degree, degrees=degrees, backend=backend, slicer_backend=slicer_backend, dtype=dtype, vineyard=vineyard, id=id, **minpres_kwargs) + new_slicer = minimal_presentation(self, degree=degree, degrees=degrees, backend=backend, slicer_backend=slicer_backend, dtype=dtype, vineyard=vineyard, **minpres_kwargs) return new_slicer @property @@ -858,7 +857,6 @@ def minimal_presentation( str backend:Literal["mpfree", "2pac", ""]="mpfree", str slicer_backend:Literal["matrix","clement","graph"]="matrix", bool vineyard=True, - id :Optional[str] =None, dtype:type|_valid_dtypes=None, int n_jobs = -1, bool force=False, @@ -887,8 +885,6 @@ def minimal_presentation( assert degree>=0, f"Degree not provided." if not np.any(slicer.get_dimensions() == degree): return type(slicer)() - if id is None: - id = str(threading.get_native_id()) if dtype is None: dtype = slicer.dtype dimension = slicer.dimension - degree # latest = L-1, which is empty, -1 for degree 0, -2 for degree 1 etc. From 8cb3dbabea104eed5f9c3def518fd57f7e4baef0 Mon Sep 17 00:00:00 2001 From: David Loiseaux Date: Mon, 27 Oct 2025 14:40:48 +0100 Subject: [PATCH 5/7] tests: add parallel test --- tests/test_parallel.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/test_parallel.py diff --git a/tests/test_parallel.py b/tests/test_parallel.py new file mode 100644 index 00000000..14d9f911 --- /dev/null +++ b/tests/test_parallel.py @@ -0,0 +1,35 @@ +import numpy as np +import pytest +from joblib import Parallel, delayed + +import multipers as mp +import multipers.io as mio +from multipers.tests import assert_sm_pair + +np.random.seed(0) + + +def io_fd_mpfree(x): + s = mp.filtrations.DelaunayCodensity(points=x, bandwidth=0.2) + s = s.minpres(1).to_colexical() + return mp.signed_measure(s, degree=1)[0] + + +def io_fd_mpfree2(x): + s = mp.filtrations.DelaunayCodensity(points=x, bandwidth=0.2) + return mp.signed_measure(s, degree=1)[0] + + +@pytest.mark.skipif( + not (mio._check_available("mpfree") and mio._check_available("function_delaunay")), + reason="Skipped external test as `function_delaunay`, `mpfree` were not found.", +) +@pytest.mark.parametrize("backend", ["loky", "threading"]) +def test_io_parallel(backend): + x = mp.data.three_annulus(100, 50) + X = [x] * 100 + ground_truth = io_fd_mpfree2(x) + dgms = Parallel(n_jobs=-1, backend=backend)(delayed(io_fd_mpfree)(x) for x in X) + + for dgm in dgms: + assert_sm_pair(ground_truth, dgm, exact=False, reg=0) From fd6d0fe21a6b37d8579946ef2e8205d33d9f5aad Mon Sep 17 00:00:00 2001 From: David Loiseaux Date: Mon, 27 Oct 2025 14:42:59 +0100 Subject: [PATCH 6/7] tests: parallel. force hilbert --- tests/test_parallel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_parallel.py b/tests/test_parallel.py index 14d9f911..cc69acc1 100644 --- a/tests/test_parallel.py +++ b/tests/test_parallel.py @@ -12,12 +12,12 @@ def io_fd_mpfree(x): s = mp.filtrations.DelaunayCodensity(points=x, bandwidth=0.2) s = s.minpres(1).to_colexical() - return mp.signed_measure(s, degree=1)[0] + return mp.signed_measure(s, degree=1, invariant="hilbert")[0] def io_fd_mpfree2(x): s = mp.filtrations.DelaunayCodensity(points=x, bandwidth=0.2) - return mp.signed_measure(s, degree=1)[0] + return mp.signed_measure(s, degree=1, invariant="hilbert")[0] @pytest.mark.skipif( From 2a8eb618feb3753980745bc5eaf2994fd45195c9 Mon Sep 17 00:00:00 2001 From: David Loiseaux Date: Mon, 27 Oct 2025 14:44:22 +0100 Subject: [PATCH 7/7] tests: parallel. forgot max_error --- tests/test_parallel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_parallel.py b/tests/test_parallel.py index cc69acc1..263b0f71 100644 --- a/tests/test_parallel.py +++ b/tests/test_parallel.py @@ -32,4 +32,4 @@ def test_io_parallel(backend): dgms = Parallel(n_jobs=-1, backend=backend)(delayed(io_fd_mpfree)(x) for x in X) for dgm in dgms: - assert_sm_pair(ground_truth, dgm, exact=False, reg=0) + assert_sm_pair(ground_truth, dgm, exact=False, reg=0, max_error= 1e-10)