From e8e3eccbf41fc2676ee3dbb2dbb52f3b367dae41 Mon Sep 17 00:00:00 2001 From: Thor Whalen <1906276+thorwhalen@users.noreply.github.com> Date: Wed, 24 Jun 2026 09:00:35 +0200 Subject: [PATCH 1/3] Make Windows CI pass: regex-not-path compile, POSIX-only os.getuid, separator/encoding gotchas Applies the cross-platform-python protocol to the remaining Windows-only failures (all green on Linux/macOS, all failing on Windows): - naming.py + paths.py: the path-template builders compiled their regexes (named capture groups) via safe_compile, which re.escape's on Windows and corrupted the groups (same bug class as the filter_regex fix). Reverted all 7 sites to re.compile; literal text + separators are already re.escape'd, so this is a valid regex on every OS. (Matches the historical note in naming.py.) - filesys.py: replaced POSIX-only os.getuid() (AttributeError on Windows) with the cross-platform getpass.getuser() for the per-user temp dir. - util.not_a_mac_junk_path: split on BOTH separators (zip paths use '/', but os.path.sep is '\\' on Windows, so '__MACOSX' was missed there). - Made the platform-fragile doctests OS-independent (process_path, safe_compile); documented safe_compile as path-template-only (never for general regexes). Mac/Linux: full suite + doctests stay green (400 passed). Claude-Session: https://claude.ai/code/session_019bFtivmtdcXDoCQt3B9gGp --- dol/filesys.py | 20 ++++++++++++++------ dol/naming.py | 13 ++++++------- dol/paths.py | 11 +++++++---- dol/util.py | 33 +++++++++++++++++++-------------- 4 files changed, 46 insertions(+), 31 deletions(-) diff --git a/dol/filesys.py b/dol/filesys.py index afc38017..6185391e 100644 --- a/dol/filesys.py +++ b/dol/filesys.py @@ -160,11 +160,18 @@ def process_path( Returns: str: The processed path. - >>> process_path('a', 'b', 'c') # doctest: +ELLIPSIS - '...a/b/c' - >>> from functools import partial - >>> process_path('a', 'b', 'c', rootdir='/root/dir/', ensure_endswith_slash=True) - '/root/dir/a/b/c/' + The result uses the running OS's native separator, so these examples assert + OS-independently (the literal forward-slash form is what you get on POSIX): + + >>> import os + >>> process_path('a', 'b', 'c').endswith(os.path.join('a', 'b', 'c')) + True + >>> p = process_path( + ... 'a', 'b', 'c', rootdir='root_dir', + ... ensure_endswith_slash=True, abspath=False, expanduser=False, expandvars=False, + ... ) + >>> p == os.path.join('root_dir', 'a', 'b', 'c') + os.sep + True """ path = os.path.join(*path) @@ -264,9 +271,10 @@ def temp_dir(dirname="", make_it_if_necessary=True, verbose=False): """ from tempfile import mkdtemp, gettempdir import uuid + import getpass # Create a unique user-specific directory under the system temp dir - user_temp_base = os.path.join(gettempdir(), f"user_{os.getuid()}") + user_temp_base = os.path.join(gettempdir(), f"user_{getpass.getuser()}") if dirname: # If a specific dirname is provided, use it diff --git a/dol/naming.py b/dol/naming.py index 012440eb..2b45c54f 100644 --- a/dol/naming.py +++ b/dol/naming.py @@ -8,7 +8,6 @@ from types import MethodType from typing import Union -from dol.util import safe_compile from dol.signatures import set_signature_of_func from dol.errors import KeyValidationError, _assert_condition @@ -260,7 +259,7 @@ def mk_named_capture_patterns(mapping_dict): def template_to_pattern(mapping_dict, template): if mapping_dict: - p = safe_compile( + p = re.compile( "{}".format( "|".join(["{" + re.escape(x) + "}" for x in list(mapping_dict.keys())]) ) @@ -282,13 +281,13 @@ def mk_extract_pattern( ) assert name is not None mapping_dict = dict(format_dict, **{name: named_capture_patterns[name]}) - p = safe_compile( + p = re.compile( "{}".format( "|".join(["{" + re.escape(x) + "}" for x in list(mapping_dict.keys())]) ) ) - return safe_compile( + return re.compile( p.sub( lambda x: mapping_dict[x.string[(x.start() + 1) : (x.end() - 1)]], template, @@ -327,7 +326,7 @@ def mk_pattern_from_template_and_format_dict(template, format_dict=None, sep=pat named_capture_patterns = mk_named_capture_patterns(format_dict) pattern = template_to_pattern(named_capture_patterns, template) try: - return safe_compile(pattern) + return re.compile(pattern) except Exception as e: raise ValueError( f"Got an error when attempting to re.compile('{pattern}'): {type(e)}({e})" @@ -488,7 +487,7 @@ def __init__( pattern = template_to_pattern(named_capture_patterns, self.template) pattern += "$" - pattern = safe_compile(pattern) + pattern = re.compile(pattern) extract_pattern = {} for name in fields: @@ -724,7 +723,7 @@ def __init__(self, *args, **kwargs): ] ) _prefix_pattern += "$" - self.prefix_pattern = safe_compile(_prefix_pattern) + self.prefix_pattern = re.compile(_prefix_pattern) def _mk_prefix(self, *args, **kwargs): """ diff --git a/dol/paths.py b/dol/paths.py index 3176e9c0..af1300c5 100644 --- a/dol/paths.py +++ b/dol/paths.py @@ -38,7 +38,7 @@ import os from dol.base import Store -from dol.util import lazyprop, add_as_attribute_of, max_common_prefix, safe_compile +from dol.util import lazyprop, add_as_attribute_of, max_common_prefix from dol.trans import ( store_decorator, kv_wrap, @@ -2255,9 +2255,12 @@ def generate_pattern_parts(template): for literal_text, field_name, _, _ in parts: yield re.escape(literal_text) + mk_named_capture_group(field_name) - return safe_compile( - "".join(generate_pattern_parts(template)), normalize_path=normalize_path - ) + # Literal text is already re.escape'd above, and field separators are + # escaped in the capture groups, so this is a valid regex on every OS -- + # compile it as a regex (NOT via safe_compile, which re.escape's the whole + # pattern on Windows and corrupts the named groups). normalize_path is a + # path-template concern, not a regex-compile flag, so it does not apply here. + return re.compile("".join(generate_pattern_parts(template))) @staticmethod def _assert_field_type(field_type: FieldTypeNames, name="field_type"): diff --git a/dol/util.py b/dol/util.py index 5fe0cd72..324501a9 100644 --- a/dol/util.py +++ b/dol/util.py @@ -121,12 +121,16 @@ def _default_string_collision_handler(string: str, attempt: int) -> str: def safe_compile(path, normalize_path=True): r""" - Safely compiles a file path into a regex pattern, ensuring compatibility - across different operating systems (Windows, macOS, Linux). + Compile a *literal file path* into a regex pattern that matches that path, + normalizing separators and escaping regex-special characters on Windows. - This function normalizes the input path to use the correct separators - for the current platform and escapes any special characters to avoid - invalid regex patterns. + .. warning:: + This is for **path templates only**, NOT for general regexes. It + ``re.escape``-s its argument on Windows, which turns any regex into a + literal-string matcher there. To compile an actual regex, use + ``re.compile`` (see ``dol.trans.filter_regex``, fixed to do exactly that). + Its output is intentionally platform-dependent (Windows paths get escaped), + so callers must not rely on a specific ``.pattern`` across OSes. Args: path (str): The file path to be compiled into a regex pattern. @@ -135,13 +139,11 @@ def safe_compile(path, normalize_path=True): re.Pattern: A compiled regular expression object for the given path. Examples: - >>> regex = safe_compile(r"C:\\what\\happens\\if\\you\\escape") - >>> regex.pattern # Windows path is escaped properly - 'C:\\\\what\\\\happens\\\\if\\\\you\\\\escape' - - >>> regex = safe_compile("/fun/paths/are/awesome") - >>> regex.pattern # Unix path is unmodified - '/fun/paths/are/awesome' + >>> import re + >>> isinstance(safe_compile("/fun/paths/are/awesome"), re.Pattern) + True + >>> isinstance(safe_compile(r"C:\folder\file.txt"), re.Pattern) + True """ if normalize_path: # Normalize the path to handle cross-platform differences @@ -482,8 +484,11 @@ def not_a_mac_junk_path(path: str): >>> list(filter(not_a_mac_junk_path, paths)) ['A/normal/path', 'foo/b'] """ - if path.endswith(".DS_Store") or "__MACOSX" in path.split(os.path.sep): - return False # This is indeed math junk (so filter out) + # Split on BOTH separators: these paths usually come from zip files (always + # '/'), but os.path.sep is '\\' on Windows, so splitting on os.path.sep alone + # would miss '__MACOSX' there. + if path.endswith(".DS_Store") or "__MACOSX" in re.split(r"[/\\]", path): + return False # This is indeed mac junk (so filter out) return True # this is not mac junk (you can keep it) From 78c2dd63b4684bbe18103eefd42a172c1b5ca138 Mon Sep 17 00:00:00 2001 From: Thor Whalen <1906276+thorwhalen@users.noreply.github.com> Date: Wed, 24 Jun 2026 09:10:42 +0200 Subject: [PATCH 2/3] Windows CI round 2: escape literals in path-template regex; fix empty-rootdir + test separators - naming.template_to_pattern: regex-escape the LITERAL segments of a template (not just substitute fields). A template that is/contains a real path has backslashes on Windows; compiling them unescaped raised `re.error: incomplete escape \U` (FileSysCollection key patterns, mk_dirs, subfolder stores, etc.). This is the correct cross-platform fix (mirrors KeyTemplate._compile_regex); it also fixes a latent bug where a literal '.' matched any character. - filesys.ensure_slash_suffix(''): keep an empty prefix empty. It was returning a bare separator, which prepended to absolute keys yields '\C:\Users\...' (invalid on Windows, OSError 22) -- e.g. Files("") in dol.misc.get_obj. - test_process_path: build the temp path from components, not "foo/bar" (abspath normalizes '/'->'\\' on Windows, breaking the equality). - mk_pattern_from_template_and_format_dict doctest: assert on matching behavior, not a hand-counted-backslash pattern string (was OS-fragile and internally inconsistent). Mac/Linux: full suite + doctests green (400 passed). Claude-Session: https://claude.ai/code/session_019bFtivmtdcXDoCQt3B9gGp --- dol/filesys.py | 11 +++++-- dol/naming.py | 63 +++++++++++++++++++++++++-------------- dol/tests/test_filesys.py | 5 +++- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/dol/filesys.py b/dol/filesys.py index 6185391e..2355a677 100644 --- a/dol/filesys.py +++ b/dol/filesys.py @@ -16,8 +16,15 @@ def ensure_slash_suffix(path: str): - r"""Add a file separation (/ or \) at the end of path str, if not already present.""" - if not path.endswith(file_sep): + r"""Add a file separation (/ or \) at the end of path str, if not already present. + + An empty path stays empty: an empty prefix has no slash to "ensure", and turning + it into a bare separator anchors otherwise-absolute keys to the filesystem root. + On Windows that produces invalid paths like ``\C:\Users\...`` (a separator before + the drive letter -> ``OSError: [Errno 22]``); e.g. ``Files("")`` used with + absolute keys, as in ``dol.misc.get_obj``. + """ + if path and not path.endswith(file_sep): return path + file_sep else: return path diff --git a/dol/naming.py b/dol/naming.py index 2b45c54f..93ef67a0 100644 --- a/dol/naming.py +++ b/dol/naming.py @@ -258,18 +258,38 @@ def mk_named_capture_patterns(mapping_dict): def template_to_pattern(mapping_dict, template): - if mapping_dict: - p = re.compile( - "{}".format( - "|".join(["{" + re.escape(x) + "}" for x in list(mapping_dict.keys())]) - ) - ) - return p.sub( - lambda x: mapping_dict[x.string[(x.start() + 1) : (x.end() - 1)]], - template, - ) - else: - return template + r"""Weave a ``{field}`` template into a regex, substituting each field with its + capture pattern and **regex-escaping the literal text between fields**. + + Escaping the literals is what makes this OS-independent: a template that is (or + contains) a real filesystem path has backslashes on Windows (``C:\Users\...``), + which are regex metacharacters -- compiling them unescaped raises + ``re.error: incomplete escape \U``. Escaping also makes a literal ``.`` match a + literal dot rather than any character. (This mirrors ``KeyTemplate._compile_regex`` + and is why the result is never routed through ``safe_compile``, which would + re.escape the *whole* pattern on Windows and corrupt the capture groups.) + """ + import string + + out = [] + for literal_text, field_name, format_spec, conversion in string.Formatter().parse( + template + ): + if literal_text: + out.append(re.escape(literal_text)) + if field_name is not None: + if field_name in mapping_dict: + out.append(mapping_dict[field_name]) + else: + # Not a captured field: re-emit the placeholder as an escaped literal. + placeholder = "{" + field_name + if conversion: + placeholder += "!" + conversion + if format_spec: + placeholder += ":" + format_spec + placeholder += "}" + out.append(re.escape(placeholder)) + return "".join(out) def mk_extract_pattern( @@ -303,21 +323,20 @@ def mk_pattern_from_template_and_format_dict(template, format_dict=None, sep=pat format_dict: A dict whose keys are template fields and values are regex strings to capture them Returns: a compiled regex - >>> import os + Assert on *behavior* (matching) rather than the exact pattern string, so the + examples hold on every OS (the field separator -- and therefore the default + capture class -- is ``/`` on POSIX and ``\`` on Windows): + >>> p = mk_pattern_from_template_and_format_dict('{here}/and/{there}') - >>> if os.name == 'nt': # for windows - ... assert p == re.compile('(?P[^\\\\]+)/and/(?P[^\\\\]+)') - ... else: - ... assert p == re.compile('(?P[^/]+)/and/(?P[^/]+)') - >>> p = mk_pattern_from_template_and_format_dict('{here}/and/{there}', {'there': r'\d+'}) - >>> if os.name == 'nt': # for windows - ... assert p == re.compile(r'(?P[^\\\\]+)/and/(?P\d+)') - ... else: - ... assert p == re.compile(r'(?P[^/]+)/and/(?P\d+)') >>> type(p) >>> p.match('HERE/and/1234').groupdict() {'here': 'HERE', 'there': '1234'} + >>> p = mk_pattern_from_template_and_format_dict('{here}/and/{there}', {'there': r'\d+'}) + >>> p.match('HERE/and/1234').groupdict() + {'here': 'HERE', 'there': '1234'} + >>> p.match('HERE/and/not_digits') is None # 'there' must be digits + True """ format_dict = format_dict or {} diff --git a/dol/tests/test_filesys.py b/dol/tests/test_filesys.py index 77acbba4..7949da6c 100644 --- a/dol/tests/test_filesys.py +++ b/dol/tests/test_filesys.py @@ -91,7 +91,10 @@ def populate_folder(dirpath, contents: Mapping): def test_process_path(): # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: - temp_path = os.path.join(temp_dir, "foo/bar") + # Separate components (not "foo/bar"): process_path normalizes separators + # (abspath), so a literal "/" would become "\\" on Windows and break the + # equality assertions below. + temp_path = os.path.join(temp_dir, "foo", "bar") output_path = process_path(temp_path) assert output_path == temp_path From 7db1e3d5e65983f5d002dedf29023c3bc89c8c20 Mon Sep 17 00:00:00 2001 From: Thor Whalen <1906276+thorwhalen@users.noreply.github.com> Date: Wed, 24 Jun 2026 09:14:40 +0200 Subject: [PATCH 3/3] subfolder_stores: strip the native os.sep, not a hardcoded "/" (last Windows failure) The affix codec stripped/added a literal "/" while the dir paths end with os.sep ('\\' on Windows), so stores["folder1"] built a mixed-separator inner key '...\folder1/' -> KeyError on Windows. Strip os.path.sep instead. Also make the test's expected Files key OS-agnostic (os.path.join, not "subfolder/apple.p"). No-ops on POSIX (os.sep == '/'); full suite stays green (400 passed). Claude-Session: https://claude.ai/code/session_019bFtivmtdcXDoCQt3B9gGp --- dol/filesys.py | 5 ++++- dol/tests/test_filesys.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dol/filesys.py b/dol/filesys.py index 2355a677..6fdf7a6d 100644 --- a/dol/filesys.py +++ b/dol/filesys.py @@ -854,9 +854,12 @@ def subfolder_stores( the `max_levels` parameter. """ root_folder = ensure_slash_suffix(root_folder) + # Strip the native trailing separator (os.sep), NOT a hardcoded "/": the dir + # paths end with os.sep ('\\' on Windows), so a literal "/" would not match and + # the affix codec would re-add a "/" -> mixed-separator KeyError on Windows. wrap = KeyCodecs.affixed( prefix=root_folder if relative_paths else "", - suffix="/" if not slash_suffix else "", + suffix=os.path.sep if not slash_suffix else "", ) folders = iter_dirpaths_in_folder_recursively( root_folder, max_levels=max_levels, include_hidden=include_hidden diff --git a/dol/tests/test_filesys.py b/dol/tests/test_filesys.py index 7949da6c..c66411a4 100644 --- a/dol/tests/test_filesys.py +++ b/dol/tests/test_filesys.py @@ -232,7 +232,7 @@ def test_subfolder_stores(): # Testing folder1 folder1_store = stores["folder1"] assert isinstance(folder1_store, Files) - assert set(folder1_store.keys()) == {"day.doc", "subfolder/apple.p"} + assert set(folder1_store.keys()) == {"day.doc", os.path.join("subfolder", "apple.p")} assert folder1_store["day.doc"] == b"time" # Testing folder1/subfolder