diff --git a/.gitignore b/.gitignore index 508d57ca9d6..a4a37f36b16 100644 --- a/.gitignore +++ b/.gitignore @@ -12,8 +12,6 @@ examples/ .venv venv requirements.txt -.pyi_generator_last_run -.pyi_generator_diff reflex.db .codspeed .env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a06454f5d16..cd53f99c595 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,6 +31,7 @@ repos: entry: python3 scripts/make_pyi.py language: system always_run: true + pass_filenames: false require_serial: true - repo: local hooks: diff --git a/AGENTS.md b/AGENTS.md index 3c9593ff624..10910290a47 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -85,7 +85,7 @@ Playwright tests use the `page` fixture and navigate to `harness.frontend_url`. ## .pyi stubs -When adding/modifying components: `uv run python scripts/make_pyi.py`. Commit `pyi_hashes.json` (not `.pyi` files). If the diff removes many modules, run `uv sync`, delete `.pyi_generator_last_run`, and regenerate. +When components change, run `uv run python scripts/make_pyi.py` and commit only `pyi_hashes.json`. It incrementally regenerates changed stubs and dependent subclasses. ## Breaking changes and deprecation diff --git a/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py b/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py index 8ca81b2d0e7..2c3277a72be 100644 --- a/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py +++ b/packages/hatch-reflex-pyi/src/hatch_reflex_pyi/plugin.py @@ -67,8 +67,17 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: # Run from src/ so _path_to_module_name produces valid import names # (e.g. "reflex_components_core.core.banner" instead of # "packages.reflex-components-core.src.reflex_components_core.core.banner"). + # --no-update-hashes: parallel workspace builds would race on the shared + # pyi_hashes.json; that file is a dev-tooling artifact and isn't needed + # at install/wheel-build time. subprocess.run( - [sys.executable, "-m", "reflex_base.utils.pyi_generator", src_dir.name], + [ + sys.executable, + "-m", + "reflex_base.utils.pyi_generator", + "--no-update-hashes", + src_dir.name, + ], cwd=src_dir.parent, check=True, ) diff --git a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py index 0425f03d7b1..4bc1342392e 100644 --- a/packages/reflex-base/src/reflex_base/utils/pyi_generator.py +++ b/packages/reflex-base/src/reflex_base/utils/pyi_generator.py @@ -1599,15 +1599,72 @@ def _scan_file(module_path: Path) -> tuple[str, str] | None: return str(module_path.with_suffix(".pyi").resolve()), content_hash +def _update_pyi_hashes_file( + written_files: list[tuple[str, str]], + scanned_sources: list[Path], +) -> None: + """Merge a scan's results into the workspace ``pyi_hashes.json``. + + Walks up from the current working directory to find an existing + ``pyi_hashes.json`` (the file is workspace-scoped — one per repo). If none + exists, a fresh one is created in cwd. Existing entries survive unless + their source file disappeared, or their stub was scanned this run but no + longer produces a stub. Newly produced hashes are written in. + + Args: + written_files: ``(absolute pyi path, content md5)`` tuples for stubs + written this run. + scanned_sources: Absolute ``.py`` paths that were scanned this run, + including those that produced no stub. An entry whose source was + scanned but produced nothing this run is dropped from the file. + """ + written_paths = [Path(p) for p, _ in written_files] + hashes = [h for _, h in written_files] + + pyi_hashes_parent = Path.cwd().resolve() + while ( + pyi_hashes_parent != pyi_hashes_parent.parent + and not (pyi_hashes_parent / PYI_HASHES).exists() + ): + pyi_hashes_parent = pyi_hashes_parent.parent + + pyi_hashes_file = pyi_hashes_parent / PYI_HASHES + if pyi_hashes_file.exists(): + existing = json.loads(pyi_hashes_file.read_text()) + else: + pyi_hashes_file = (Path.cwd() / PYI_HASHES).resolve() + pyi_hashes_parent = pyi_hashes_file.parent + existing = {} + + produced = { + p.relative_to(pyi_hashes_parent).as_posix(): h + for p, h in zip(written_paths, hashes, strict=True) + } + scanned = { + source.with_suffix(".pyi").relative_to(pyi_hashes_parent).as_posix() + for source in scanned_sources + if source.with_suffix(".pyi").is_relative_to(pyi_hashes_parent) + } + pyi_hashes = { + entry: produced.get(entry, current) + for entry, current in existing.items() + if (entry in produced or entry not in scanned) + and (pyi_hashes_parent / entry).with_suffix(".py").exists() + } + for entry, hashed in produced.items(): + pyi_hashes.setdefault(entry, hashed) + + pyi_hashes_file.write_text(json.dumps(pyi_hashes, indent=2, sort_keys=True) + "\n") + + class PyiGenerator: """A .pyi file generator that will scan all defined Component in Reflex and generate the appropriate stub. """ - modules: list = [] - root: str = "" - current_module: Any = {} - written_files: list[tuple[str, str]] = [] + def __init__(self) -> None: + """Initialize per-instance scan state.""" + self.written_files: list[tuple[str, str]] = [] def _scan_files(self, files: list[Path]): max_workers = min(multiprocessing.cpu_count() or 1, len(files), 8) @@ -1673,7 +1730,7 @@ def scan_all( and target_path.suffix == ".py" and target_path.name not in EXCLUDED_FILES ): - file_targets.append(target_path) + file_targets.append(target_path.resolve()) continue if not target_path.is_dir(): continue @@ -1681,10 +1738,7 @@ def scan_all( relative = _relative_to_pwd(file_path) if relative.name in EXCLUDED_FILES or file_path.suffix != ".py": continue - if ( - changed_files is not None - and _relative_to_pwd(file_path) not in changed_files - ): + if changed_files is not None and relative not in changed_files: continue file_targets.append(file_path) @@ -1702,79 +1756,14 @@ def scan_all( self._scan_files(file_targets) - file_paths, hashes = ( - [f[0] for f in self.written_files], - [f[1] for f in self.written_files], - ) - # Fix generated pyi files with ruff. - if file_paths: - subprocess.run(["ruff", "format", *file_paths]) - subprocess.run(["ruff", "check", "--fix", *file_paths]) - - if use_json: - if file_paths and changed_files is None: - file_paths = list(map(Path, file_paths)) - top_dir = file_paths[0].parent - for file_path in file_paths: - file_parent = file_path.parent - while len(file_parent.parts) > len(top_dir.parts): - file_parent = file_parent.parent - while len(top_dir.parts) > len(file_parent.parts): - top_dir = top_dir.parent - while not file_parent.samefile(top_dir): - file_parent = file_parent.parent - top_dir = top_dir.parent - - while ( - not top_dir.samefile(top_dir.parent) - and not (top_dir / PYI_HASHES).exists() - ): - top_dir = top_dir.parent - - pyi_hashes_file = top_dir / PYI_HASHES - - if pyi_hashes_file.exists(): - pyi_hashes_file.write_text( - json.dumps( - dict( - zip( - [ - f.relative_to(pyi_hashes_file.parent).as_posix() - for f in file_paths - ], - hashes, - strict=True, - ) - ), - indent=2, - sort_keys=True, - ) - + "\n", - ) - elif file_paths: - file_paths = list(map(Path, file_paths)) - pyi_hashes_parent = file_paths[0].parent - while ( - not pyi_hashes_parent.samefile(pyi_hashes_parent.parent) - and not (pyi_hashes_parent / PYI_HASHES).exists() - ): - pyi_hashes_parent = pyi_hashes_parent.parent - - pyi_hashes_file = pyi_hashes_parent / PYI_HASHES - if pyi_hashes_file.exists(): - pyi_hashes = json.loads(pyi_hashes_file.read_text()) - for file_path, hashed_content in zip( - file_paths, hashes, strict=False - ): - formatted_path = file_path.relative_to( - pyi_hashes_parent - ).as_posix() - pyi_hashes[formatted_path] = hashed_content - - pyi_hashes_file.write_text( - json.dumps(pyi_hashes, indent=2, sort_keys=True) + "\n" - ) + if self.written_files: + written_paths = [p for p, _ in self.written_files] + subprocess.run(["ruff", "format", *written_paths]) + subprocess.run(["ruff", "check", "--fix", *written_paths]) + + if use_json and (self.written_files or file_targets): + _update_pyi_hashes_file(self.written_files, file_targets) if __name__ == "__main__": @@ -1787,10 +1776,18 @@ def scan_all( default=["reflex/components", "reflex/experimental", "reflex/__init__.py"], help="Target directories/files to process", ) + parser.add_argument( + "--no-update-hashes", + dest="update_hashes", + action="store_false", + help="Do not read or write the workspace pyi_hashes.json. " + "Use this from build hooks where parallel package builds would race " + "on a single shared hash file.", + ) args = parser.parse_args() logging.basicConfig(level=logging.INFO) logging.getLogger("blib2to3.pgen2.driver").setLevel(logging.INFO) gen = PyiGenerator() - gen.scan_all(args.targets, None, use_json=True) + gen.scan_all(args.targets, None, use_json=args.update_hashes) diff --git a/scripts/hatch_build.py b/scripts/hatch_build.py index 37d3a913707..1083ae6d1fe 100644 --- a/scripts/hatch_build.py +++ b/scripts/hatch_build.py @@ -41,7 +41,12 @@ def initialize(self, version: str, build_data: dict[str, Any]) -> None: file.unlink(missing_ok=True) subprocess.run( - [sys.executable, "-m", "reflex_base.utils.pyi_generator"], + [ + sys.executable, + "-m", + "reflex_base.utils.pyi_generator", + "--no-update-hashes", + ], check=True, ) self.marker().touch() diff --git a/scripts/make_pyi.py b/scripts/make_pyi.py index fc905a15036..3ea6984a1b2 100644 --- a/scripts/make_pyi.py +++ b/scripts/make_pyi.py @@ -1,17 +1,34 @@ -"""The pyi generator module.""" +"""The pyi generator module. +The last commit that touched ``pyi_hashes.json`` is used as the baseline of +"last successful regeneration". Sources changed since that commit (committed, +staged, unstaged, untracked) drive an incremental run; the change set is +expanded along the import graph so modifying a parent class also regenerates +the stubs of every subclass that inherits from it. + +A full regeneration is forced when ``pyi_hashes.json`` is absent, or when the +generator's own files (``scripts/make_pyi.py`` or the ``PyiGenerator`` +library) appear in the change set. +""" + +import ast import logging import subprocess import sys +from collections import defaultdict +from collections.abc import Iterable from pathlib import Path -from reflex_base.utils.pyi_generator import PyiGenerator, _relative_to_pwd +from reflex_base.utils.pyi_generator import PyiGenerator logger = logging.getLogger("pyi_generator") -LAST_RUN_COMMIT_SHA_FILE = Path(".pyi_generator_last_run").resolve() -GENERATOR_FILE = Path(__file__).resolve() -GENERATOR_DIFF_FILE = Path(".pyi_generator_diff").resolve() +PYI_HASHES = Path("pyi_hashes.json") +GENERATOR_PATHS = frozenset({ + "scripts/make_pyi.py", + "packages/reflex-base/src/reflex_base/utils/pyi_generator.py", +}) + DEFAULT_TARGETS = [ "reflex/components", "reflex/experimental", @@ -31,75 +48,223 @@ ] -def _git_diff(args: list[str]) -> str: - """Run a git diff command. +def _git(*args: str) -> list[str]: + """Run ``git`` with `args` and return non-empty stdout lines. Args: - args: The args to pass to git diff. + *args: Arguments forwarded to ``git``. + + Returns: + Non-empty lines of standard output, with trailing newlines stripped. + """ + result = subprocess.run(["git", *args], capture_output=True, text=True, check=False) + return [line for line in result.stdout.splitlines() if line] + + +def _last_regen_sha() -> str | None: + """Return the SHA of the last commit that touched ``pyi_hashes.json``. Returns: - The output of the git diff command. + The commit SHA, or ``None`` if the file is missing or has no history. """ - cmd = ["git", "diff", "--no-color", *args] - return subprocess.run(cmd, capture_output=True, encoding="utf-8").stdout + if not PYI_HASHES.exists(): + return None + out = _git("log", "-1", "--format=%H", "--", str(PYI_HASHES)) + return out[0] if out else None + +def _changed_python_paths(sha: str) -> set[str]: + """All ``.py`` paths changed since `sha`. -def _git_changed_files(args: list[str] | None = None) -> list[Path]: - """Get the list of changed files for a git diff command. + A single ``git diff `` covers committed, staged, and unstaged changes + (it diffs the working tree against the commit). Brand-new untracked files + aren't included; ``git add`` them first to bring them into scope. Args: - args: The args to pass to git diff. + sha: The baseline commit SHA. Returns: - The list of changed files. + Repo-relative paths of every ``.py`` file changed since `sha`. """ - if not args: - args = [] + return {p for p in _git("diff", "--name-only", sha) if p.endswith(".py")} + + +def _key(path: Path) -> str: + """POSIX-style repo-relative string key for `path`. - if "--name-only" not in args: - args.insert(0, "--name-only") + Args: + path: The absolute path to convert. - diff = _git_diff(args).splitlines() - return [Path(file.strip()) for file in diff] + Returns: + Repo-relative POSIX path string. + """ + return path.relative_to(Path.cwd()).as_posix() -def _get_changed_files() -> list[Path] | None: - """Get the list of changed files since the last run of the generator. +def _gather_sources(targets: list[str]) -> list[Path]: + """Resolve every ``.py`` file reachable from `targets`. + + Args: + targets: User-provided target list (files or directories). Returns: - The list of changed files, or None if all files should be regenerated. + Sorted list of absolute paths to ``.py`` files under `targets`. """ - try: - last_run_commit_sha = LAST_RUN_COMMIT_SHA_FILE.read_text().strip() - except FileNotFoundError: - logger.info( - "make_pyi.py last run could not be determined, regenerating all .pyi files" - ) - return None - changed_files = _git_changed_files([f"{last_run_commit_sha}..HEAD"]) - # get all unstaged changes - changed_files.extend(_git_changed_files()) - if _relative_to_pwd(GENERATOR_FILE) not in changed_files: - return changed_files - logger.info("make_pyi.py has changed, checking diff now") - diff = "".join(_git_diff([GENERATOR_FILE.as_posix()]).splitlines()[2:]) + seen: set[Path] = set() + for target in targets: + p = Path(target).resolve() + if p.is_file() and p.suffix == ".py": + seen.add(p) + elif p.is_dir(): + seen.update(p.rglob("*.py")) + return sorted(seen) + + +def _package_parts(path: Path) -> list[str]: + """Dotted parts of the package containing `path`. + + For ``pkg/foo/bar.py`` and for ``pkg/foo/__init__.py`` this returns + ``["pkg", "foo"]`` — i.e. the package the module participates in, not the + module itself. + + Args: + path: Absolute path to a ``.py`` file. + + Returns: + Package parts in import order (top-level first), or ``[]`` if `path` + is not inside a package. + """ + parts: list[str] = [] + parent = path.parent + while (parent / "__init__.py").exists() and parent != parent.parent: + parts.append(parent.name) + parent = parent.parent + return list(reversed(parts)) + + +def _module_aliases(path: Path) -> set[str]: + """Dotted module names that an ``import`` could resolve to `path`. + + Walks upward while parent directories contain ``__init__.py`` to recover + the top-level package. For ``__init__.py`` files, also emits the package + name on its own (``import pkg`` reaches ``pkg/__init__.py``). + + Args: + path: Absolute path to a ``.py`` file. + + Returns: + Set of dotted module names that could refer to `path`. + """ + pkg = _package_parts(path) + if path.stem == "__init__": + full = ".".join([*pkg, "__init__"]) + aliases = {full} + if pkg: + aliases.add(".".join(pkg)) + return aliases + return {".".join([*pkg, path.stem])} if pkg else {path.stem} + + +def _iter_import_nodes( + nodes: Iterable[ast.AST], +) -> Iterable[ast.Import | ast.ImportFrom]: + """Yield import nodes reachable without entering function or class bodies. + + Imports live at module top level or inside ``if TYPE_CHECKING:`` / + ``try/except ImportError`` / ``with`` blocks. Walking function and class + bodies wastes time and never finds anything that shapes the import graph. + + Args: + nodes: AST nodes to scan (typically ``tree.body``). + + Yields: + Each ``ast.Import`` / ``ast.ImportFrom`` node encountered. + """ + for node in nodes: + if isinstance(node, (ast.Import, ast.ImportFrom)): + yield node + elif isinstance(node, ast.If): + yield from _iter_import_nodes(node.body) + yield from _iter_import_nodes(node.orelse) + elif isinstance(node, ast.Try): + yield from _iter_import_nodes(node.body) + yield from _iter_import_nodes(node.orelse) + yield from _iter_import_nodes(node.finalbody) + for handler in node.handlers: + yield from _iter_import_nodes(handler.body) + elif hasattr(ast, "TryStar") and isinstance(node, ast.TryStar): + yield from _iter_import_nodes(node.body) + for handler in node.handlers: + yield from _iter_import_nodes(handler.body) + elif isinstance(node, (ast.With, ast.AsyncWith)): + yield from _iter_import_nodes(node.body) + + +def _imports_in(path: Path) -> set[str]: + """Absolute module names imported by `path`. + + For ``from pkg import name`` we emit both ``pkg`` and ``pkg.name`` so the + graph captures dependencies on either the package or one of its submodules. + Relative imports (``from .base import X``, ``from ..util import Y``) are + resolved against `path`'s own package so they participate in the graph. + + Args: + path: Absolute path to a ``.py`` file. + Returns: + Dotted module names referenced by imports in `path`. + """ try: - last_diff = GENERATOR_DIFF_FILE.read_text() - if diff != last_diff: - logger.info("make_pyi.py has changed, regenerating all .pyi files") - changed_files = None - else: - logger.info("make_pyi.py has not changed, only regenerating changed files") - except FileNotFoundError: - logger.info( - "make_pyi.py diff could not be determined, regenerating all .pyi files" - ) - changed_files = None + tree = ast.parse(path.read_bytes(), filename=str(path)) + except (OSError, SyntaxError): + return set() + imports: set[str] = set() + pkg = _package_parts(path) + for node in _iter_import_nodes(tree.body): + if isinstance(node, ast.Import): + imports.update(alias.name for alias in node.names) + continue + if node.level == 0: + if node.module: + imports.add(node.module) + imports.update(f"{node.module}.{alias.name}" for alias in node.names) + continue + if node.level > len(pkg): + continue + base = pkg[: len(pkg) - (node.level - 1)] + if not base: + continue + target = ".".join([*base, node.module]) if node.module else ".".join(base) + imports.add(target) + imports.update(f"{target}.{alias.name}" for alias in node.names) + return imports - GENERATOR_DIFF_FILE.write_text(diff) - return changed_files +def _expand_with_dependents(changed: set[Path], sources: list[Path]) -> set[Path]: + """Add every source that transitively imports a changed source. + + Args: + changed: Sources detected as directly modified. + sources: All sources reachable from the targets. + + Returns: + `changed` union all sources whose import graph reaches a changed source. + """ + importers: dict[str, set[Path]] = defaultdict(set) + for src in sources: + for mod in _imports_in(src): + importers[mod].add(src) + + seen = set(changed) + queue = list(changed) + while queue: + current = queue.pop() + for alias in _module_aliases(current): + for dependent in importers.get(alias, ()): + if dependent not in seen: + seen.add(dependent) + queue.append(dependent) + return seen if __name__ == "__main__": @@ -111,8 +276,6 @@ def _get_changed_files() -> list[Path] | None: if len(sys.argv) > 1 else DEFAULT_TARGETS ) - - # Only include targets that have a prefix in the default target list targets = [ target for target in targets @@ -121,16 +284,39 @@ def _get_changed_files() -> list[Path] | None: logger.info(f"Running .pyi generator for {targets}") - changed_files = _get_changed_files() - if changed_files is None: - logger.info("Changed files could not be detected, regenerating all .pyi files") + sha = _last_regen_sha() + if sha is None: + if PYI_HASHES.exists(): + logger.warning( + f"{PYI_HASHES} exists locally but has no git history; " + "every run will full-regenerate until the file is committed." + ) + else: + logger.info( + "No pyi_hashes.json baseline in git, regenerating all .pyi files" + ) + changed_files: list[Path] | None = None else: - logger.info(f"Detected changed files: {changed_files}") + changed = _changed_python_paths(sha) + if changed & GENERATOR_PATHS: + logger.info("Generator changed, regenerating all .pyi files") + changed_files = None + else: + sources = _gather_sources(targets) + sources_by_key = {_key(p): p for p in sources} + directly_changed = { + sources_by_key[p] for p in changed if p in sources_by_key + } + if not directly_changed: + logger.info("No source files changed since last regeneration") + changed_files = [] + else: + expanded = _expand_with_dependents(directly_changed, sources) + logger.info( + f"Detected {len(directly_changed)} direct change(s), " + f"{len(expanded)} after transitive expansion" + ) + changed_files = [Path(_key(p)) for p in expanded] gen = PyiGenerator() gen.scan_all(targets, changed_files, use_json=True) - - current_commit_sha = subprocess.run( - ["git", "rev-parse", "HEAD"], capture_output=True, encoding="utf-8" - ).stdout.strip() - LAST_RUN_COMMIT_SHA_FILE.write_text(current_commit_sha) diff --git a/tests/integration/test_tailwind.py b/tests/integration/test_tailwind.py index 071570ce44a..ac3e2bf18a5 100644 --- a/tests/integration/test_tailwind.py +++ b/tests/integration/test_tailwind.py @@ -42,7 +42,12 @@ def index(): id="p-content", ) - assets = Path(__file__).resolve().parent.parent / "assets" + # Anchor on cwd so the stylesheet lands where the compiler will look. + # AppHarness re-runs the app via importlib.reload, which preserves the + # module's original file location; on a pytest rerun with a fresh + # tmp_path, an anchor based on the module file would still point at the + # previous tmp_path while cwd has been updated by the harness. + assets = Path.cwd() / "assets" assets.mkdir(exist_ok=True) stylesheet = assets / "test_styles.css" stylesheet.write_text(".external { color: rgba(0, 0, 255, 0.5) }") diff --git a/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py new file mode 100644 index 00000000000..4cbb33b515c --- /dev/null +++ b/tests/units/reflex_base/utils/pyi_generator/test_hash_merge.py @@ -0,0 +1,224 @@ +"""Tests for ``pyi_hashes.json`` merge behavior in ``PyiGenerator.scan_all``.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from reflex_base.utils.pyi_generator import PyiGenerator + + +def _write_hashes(path: Path, mapping: dict[str, str]) -> None: + path.write_text(json.dumps(mapping, indent=2, sort_keys=True) + "\n") + + +@pytest.fixture +def workspace(tmp_path: Path) -> Path: + """Lay out a fake workspace with a couple of source files. + + Args: + tmp_path: pytest-provided tmp directory to populate. + + Returns: + The workspace root. + """ + pkg = tmp_path / "pkg" + pkg.mkdir() + (pkg / "foo.py").write_text("# placeholder\n") + (pkg / "bar.py").write_text("# placeholder\n") + other = tmp_path / "other" + other.mkdir() + (other / "baz.py").write_text("# placeholder\n") + return tmp_path + + +def test_partial_run_preserves_unrelated_entries(workspace, monkeypatch): + """Entries for files outside the run's scope are preserved.""" + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + "other/baz.pyi": "BAZ", + }, + ) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "NEW_FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == { + "pkg/foo.pyi": "NEW_FOO", + "pkg/bar.pyi": "BAR", + "other/baz.pyi": "BAZ", + } + + +def test_scanned_file_with_no_output_drops_entry(workspace, monkeypatch): + """A file scanned this run that produces no stub has its hash entry removed.""" + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + }, + ) + + bar_pyi = (workspace / "pkg" / "bar.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(bar_pyi), "BAR_NEW")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py", "pkg/bar.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == {"pkg/bar.pyi": "BAR_NEW"} + + +def test_single_scanned_file_with_no_output_drops_entry(workspace, monkeypatch): + """Scanning one file that produces no stub still drops its old hash entry.""" + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + }, + ) + + def fake_scan(self, files): + return + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == {"pkg/bar.pyi": "BAR"} + + +def test_creates_hashes_file_when_missing(workspace, monkeypatch): + """If ``pyi_hashes.json`` doesn't exist, the merge creates it.""" + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + assert not hashes_path.exists() + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + assert hashes_path.exists() + assert json.loads(hashes_path.read_text()) == {"pkg/foo.pyi": "FOO"} + + +def test_missing_source_file_drops_entry(workspace, monkeypatch): + """An entry whose source ``.py`` no longer exists is cleaned up.""" + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "FOO", + "pkg/deleted.pyi": "STALE", + }, + ) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "FOO_NEW")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=True) + + result = json.loads(hashes_path.read_text()) + assert result == {"pkg/foo.pyi": "FOO_NEW"} + + +def test_use_json_false_does_not_touch_hashes_file(workspace, monkeypatch): + """With ``use_json=False``, ``pyi_hashes.json`` is neither read nor written. + + Build hooks rely on this so parallel workspace builds don't race on a + single shared hash file at the workspace root. + """ + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + original = b"\x00not even json\x00" + hashes_path.write_bytes(original) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all(["pkg/foo.py"], changed_files=None, use_json=False) + + assert hashes_path.read_bytes() == original + + +def test_incremental_run_merges_into_existing(workspace, monkeypatch): + """An incremental run (``changed_files`` set) merges new hashes into the existing file.""" + monkeypatch.chdir(workspace) + + hashes_path = workspace / "pyi_hashes.json" + _write_hashes( + hashes_path, + { + "pkg/foo.pyi": "OLD_FOO", + "pkg/bar.pyi": "BAR", + }, + ) + + foo_pyi = (workspace / "pkg" / "foo.py").with_suffix(".pyi").resolve() + + def fake_scan(self, files): + self.written_files.append((str(foo_pyi), "NEW_FOO")) + + monkeypatch.setattr(PyiGenerator, "_scan_files", fake_scan) + + gen = PyiGenerator() + gen.scan_all( + ["pkg/foo.py"], + changed_files=[Path("pkg/foo.py")], + use_json=True, + ) + + result = json.loads(hashes_path.read_text()) + assert result == { + "pkg/foo.pyi": "NEW_FOO", + "pkg/bar.pyi": "BAR", + }