diff --git a/chain_state.json b/chain_state.json new file mode 100644 index 00000000..18f6b11b --- /dev/null +++ b/chain_state.json @@ -0,0 +1,12 @@ +{ + "current_milestone_index": 1, + "current_plan_name": "sprint-1-foundation-schema-20260503-0201", + "last_state": "stalled", + "completed": [ + { + "label": "sprint-0-spike", + "plan": "sprint-0-spike-validate-store-20260503-0106", + "status": "done" + } + ] +} diff --git a/docs/sprint-0-spike-report.md b/docs/sprint-0-spike-report.md new file mode 100644 index 00000000..3c2c9140 --- /dev/null +++ b/docs/sprint-0-spike-report.md @@ -0,0 +1,21 @@ +# Sprint 0 Spike Report + +## What the spike validated + +The throwaway-branch spike validated the refined storage direction against real Arnold call sites without touching production megaplan consumers. The unchanged Arnold `create_epic()` and `edit_epic()` flows ran through a compatibility adapter backed by the spike `FileStore`, including the seeded `inbound_message_id` message row, the turn update branch, checklist seeding and mutation, sprint replace/upsert, and queue transitions through `_lock_in`, `_queue`, `_pend`, `_reorder`, and gapless normalization. The spike journal also recovered correctly in both relevant crash windows: a process death after `prepare.json` but before `.commit` left no visible writes, and a process death after the `.commit` marker replayed the committed transaction on reopen. + +For the file-mode reader proof, the spike round-tripped copied real fixture trees from `arnold-source/.megaplan/plans/sprint-6-images-second-opinion` and `arnold-source/.megaplan/plans/sprint-3-multi-epic`, and the rework pass reran the same disposable-copy check on `arnold-source/.megaplan/plans/sprint-1b-discord-resident` specifically because that fixture carries both `review.json` and `execution_batch_10.json` (plus `execution_batch_11.json` through `execution_batch_14.json`). Those copies preserved filenames, layout, and file bytes after a byte-preserving read/write pass. The unchanged narrow `auto.py` helper surface still worked on the disposable copies: `_resolve_plan_dir(...)`, `_last_history_step_result(...)` at line 197, `_read_execute_blocking_deviations(...)` at line 223, and `_get_review_marker(...)` in the same tight loop used near line 685 all returned stable values without touching the source fixture tree. This is narrow compatibility evidence only; it is not proof that the broader plan-tree reader surface is already safe to migrate. + +## Protocol friction found + +The refined protocol is close, but the live Arnold/editorial surface is not yet a direct drop-in match. The real edit path still performs body writes through `update_epic(body=...)` rather than a dedicated `update_body(...)` call. The create path still depends on `seed_checklist(...)` as a first-class store operation. Queue behavior is still expressed as per-sprint mutations plus normalization, not as a single live `set_sprint_queue(...)` write. The store also has to support `transaction(epic_id=None)` plus message and turn updates before an epic exists, because Arnold binds the inbound message and turn rows only after `create_epic()` succeeds. + +The high-batch `auto.py` recheck also reinforces that this spike only proved the current narrow reader loop, not idealized plan-tree semantics. The copied `sprint-1b-discord-resident` fixture includes `execution_batch_10.json` through `execution_batch_14.json`, and `_read_execute_blocking_deviations(...)` still completed safely on that copy, but the helper currently sorts batch filenames lexicographically rather than numerically. Sprint 1 should keep that current behavior in mind when deciding whether the eventual repository abstraction needs to preserve filename ordering exactly or should normalize batch selection behind a compatibility layer. + +The journal spike also showed that transaction framing cannot be treated as file renames alone. The event log needs transaction-scoped framing with a shared `tx_id`, `_tx_begin`, `_tx_commit`, and tolerant scanning that ignores incomplete tails. Without that, recovery can restore files while leaving `events.jsonl` in a logically torn state. + +## Changes to absorb in Sprint 1 + +Sprint 1 should keep the refined protocol, but land it behind a compatibility layer instead of forcing Arnold to change call shapes immediately. The production `Store` surface should preserve `transaction(epic_id=None)`, bridge `update_epic(body=...)` to `update_body(...)`, and either retain `seed_checklist(...)` or provide an explicit adapter-owned shim for it. Queue support should include the refined `set_sprint_queue(...)` primitive, but Sprint 1 still needs compatibility helpers for the live Arnold per-sprint queue workflow until the caller surface is migrated deliberately. + +On the file side, Sprint 1 should carry forward the transaction journal contract from the spike: `prepare.json`, `.commit`, recover-on-open, and framed `events.jsonl` writes that ignore incomplete transactions during replay. On the plan-tree side, Sprint 1 should treat the copied-fixture proof as evidence only for the narrow `auto.py` helper reads exercised here. Broader readers and writers across the rest of the `.megaplan/plans/*` surface remain explicitly deferred and need separate validation before they should be counted as compatible. diff --git a/megaplan/_core/__init__.py b/megaplan/_core/__init__.py index 89df2826..590aa46f 100644 --- a/megaplan/_core/__init__.py +++ b/megaplan/_core/__init__.py @@ -9,10 +9,16 @@ # -- io.py: pure utilities, atomic I/O, paths, config ----------------------- from .io import ( + append_framed_json_records, + append_framed_json_transaction, artifact_path, + atomic_write_bytes, atomic_write_json, atomic_write_text, batch_artifact_path, + canonical_megaplan_root, + commit_journal_transaction, + committed_framed_json_transactions, collect_git_diff_patch, collect_git_diff_summary, compute_global_batches, @@ -20,10 +26,23 @@ config_dir, current_iteration_artifact, current_iteration_raw_artifact, + discard_uncommitted_journal_transaction, detect_available_agents, ensure_runtime_layout, + find_plan_dir, find_command, + framed_json_record_bytes, + fsync_dir, + fsync_file, get_effective, + has_any_plan_root, + journal_blob_promotion, + journal_bytes_write, + journal_commit_path, + journal_event_log, + journal_prepare_path, + journal_root, + journal_text_write, json_dump, list_batch_artifacts, load_config, @@ -31,14 +50,22 @@ megaplan_root, normalize_text, now_utc, + orphan_plans_root, + plan_search_roots, plans_root, + prepare_journal_transaction, read_json, + read_committed_framed_json_records, + recover_journal, + repo_storage_id, render_final_md, save_config, schemas_root, + scrub_stale_staging_files, sha256_file, sha256_text, slugify, + write_journal_commit_marker, ) # -- phase_runtime.py: centralized runtime policy ---------------------------- @@ -125,10 +152,16 @@ __all__ = [ # io + "append_framed_json_records", + "append_framed_json_transaction", "artifact_path", + "atomic_write_bytes", "atomic_write_json", "atomic_write_text", "batch_artifact_path", + "canonical_megaplan_root", + "commit_journal_transaction", + "committed_framed_json_transactions", "collect_git_diff_patch", "collect_git_diff_summary", "compute_global_batches", @@ -136,10 +169,23 @@ "config_dir", "current_iteration_artifact", "current_iteration_raw_artifact", + "discard_uncommitted_journal_transaction", "detect_available_agents", "ensure_runtime_layout", + "find_plan_dir", "find_command", + "framed_json_record_bytes", + "fsync_dir", + "fsync_file", "get_effective", + "has_any_plan_root", + "journal_blob_promotion", + "journal_bytes_write", + "journal_commit_path", + "journal_event_log", + "journal_prepare_path", + "journal_root", + "journal_text_write", "json_dump", "list_batch_artifacts", "load_config", @@ -147,14 +193,22 @@ "megaplan_root", "normalize_text", "now_utc", + "orphan_plans_root", + "plan_search_roots", "plans_root", + "prepare_journal_transaction", "read_json", + "read_committed_framed_json_records", + "recover_journal", + "repo_storage_id", "render_final_md", "save_config", "schemas_root", + "scrub_stale_staging_files", "sha256_file", "sha256_text", "slugify", + "write_journal_commit_marker", # phase_runtime "DEFAULT_NON_EXECUTE_TIMEOUT_CAP_SECONDS", "PHASE_RUNTIME_POLICY", diff --git a/megaplan/_core/io.py b/megaplan/_core/io.py index 20b29092..c63fe034 100644 --- a/megaplan/_core/io.py +++ b/megaplan/_core/io.py @@ -1,4 +1,4 @@ -"""Atomic I/O, JSON helpers, path resolution, and config management.""" +"""Atomic I/O, journaling helpers, path resolution, and config management.""" from __future__ import annotations @@ -6,9 +6,12 @@ import json import os import re +import struct import tempfile +import time +from base64 import b64decode, b64encode from pathlib import Path -from typing import Any +from typing import Any, Iterable, Mapping, Sequence from megaplan.schemas import SCHEMAS, strict_schema from megaplan.types import KNOWN_AGENTS @@ -109,12 +112,79 @@ def compute_global_batches(finalize_data: dict[str, Any]) -> list[list[str]]: # Atomic I/O # --------------------------------------------------------------------------- -def atomic_write_text(path: Path, content: str) -> None: +MAX_FRAMED_JSON_RECORD_BYTES = 1024 * 1024 + + +def _fsync_file_descriptor(fd: int) -> None: + os.fsync(fd) + + +def fsync_dir(path: Path) -> None: + directory = path if path.is_dir() else path.parent + directory.mkdir(parents=True, exist_ok=True) + fd = os.open(directory, os.O_RDONLY) + try: + _fsync_file_descriptor(fd) + finally: + os.close(fd) + + +def fsync_file(path: Path) -> None: + with path.open("rb") as handle: + _fsync_file_descriptor(handle.fileno()) + + +def _write_bytes_direct(path: Path, content: bytes) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("wb") as handle: + handle.write(content) + handle.flush() + _fsync_file_descriptor(handle.fileno()) + fsync_dir(path.parent) + + +def _restore_staged_payload(entry: Mapping[str, Any]) -> bytes: + payload = entry.get("content") + storage = entry.get("content_storage") + if not isinstance(payload, str): + raise ValueError("Prepared journal entry is missing inline content") + if storage == "text": + return payload.encode("utf-8") + if storage == "base64": + return b64decode(payload.encode("ascii")) + raise ValueError(f"Unsupported prepared content storage: {storage!r}") + + +def _serialize_inline_payload(content: bytes | str) -> tuple[str, str]: + if isinstance(content, str): + return ("text", content) + return ("base64", b64encode(content).decode("ascii")) + + +def _content_sha256(content: bytes | str) -> str: + if isinstance(content, str): + return sha256_text(content) + return "sha256:" + hashlib.sha256(content).hexdigest() + + +def _path_sha256(path: Path) -> str: + with path.open("rb") as handle: + return "sha256:" + hashlib.sha256(handle.read()).hexdigest() + + +def atomic_write_bytes(path: Path, content: bytes) -> None: path.parent.mkdir(parents=True, exist_ok=True) - with tempfile.NamedTemporaryFile("w", encoding="utf-8", dir=path.parent, delete=False) as handle: + with tempfile.NamedTemporaryFile("wb", dir=path.parent, delete=False) as handle: handle.write(content) + handle.flush() + _fsync_file_descriptor(handle.fileno()) temp_path = Path(handle.name) temp_path.replace(path) + fsync_dir(path.parent) + + +def atomic_write_text(path: Path, content: str) -> None: + atomic_write_bytes(path, content.encode("utf-8")) def atomic_write_json(path: Path, data: Any) -> None: @@ -125,6 +195,372 @@ def read_json(path: Path) -> Any: return json.loads(path.read_text(encoding="utf-8")) +def journal_root(root: Path) -> Path: + return root / "_journal" + + +def journal_prepare_path(root: Path, tx_id: str) -> Path: + return journal_root(root) / f"tx-{tx_id}.prepare.json" + + +def journal_commit_path(root: Path, tx_id: str) -> Path: + return journal_root(root) / f"tx-{tx_id}.commit" + + +def journal_text_write(path: Path, content: str, *, tx_id: str | None = None) -> dict[str, Any]: + storage, inline = _serialize_inline_payload(content) + temp_name = f".{path.name}.tx-{tx_id or 'pending'}.tmp" + return { + "target_path": str(path), + "temp_path": str(path.parent / temp_name), + "content_storage": storage, + "content": inline, + "content_sha256": _content_sha256(content), + "prior_content_sha256": _path_sha256(path) if path.exists() else None, + } + + +def journal_bytes_write(path: Path, content: bytes, *, tx_id: str | None = None) -> dict[str, Any]: + storage, inline = _serialize_inline_payload(content) + temp_name = f".{path.name}.tx-{tx_id or 'pending'}.tmp" + return { + "target_path": str(path), + "temp_path": str(path.parent / temp_name), + "content_storage": storage, + "content": inline, + "content_sha256": _content_sha256(content), + "prior_content_sha256": _path_sha256(path) if path.exists() else None, + } + + +def journal_event_log(path: Path, records: Sequence[Mapping[str, Any]]) -> dict[str, Any]: + return { + "path": str(path), + "records": [dict(record) for record in records], + } + + +def journal_blob_promotion( + blob_dir: Path, + content: bytes, + *, + extension: str, + metadata: Mapping[str, Any], +) -> dict[str, Any]: + storage, inline = _serialize_inline_payload(content) + normalized_ext = extension.lstrip(".") + return { + "blob_dir": str(blob_dir), + "staging_path": str(blob_dir / "data.staging"), + "final_path": str(blob_dir / f"data.{normalized_ext}"), + "meta_path": str(blob_dir / "meta.json"), + "content_storage": storage, + "content": inline, + "content_sha256": _content_sha256(content), + "metadata": dict(metadata), + } + + +def framed_json_record_bytes(record: Mapping[str, Any]) -> bytes: + payload = json.dumps(record, sort_keys=True, separators=(",", ":")).encode("utf-8") + if len(payload) > MAX_FRAMED_JSON_RECORD_BYTES: + raise ValueError( + f"Framed JSON record exceeds {MAX_FRAMED_JSON_RECORD_BYTES} bytes: {len(payload)}", + ) + return struct.pack(">I", len(payload)) + payload + b"\n" + + +def append_framed_json_records(path: Path, records: Sequence[Mapping[str, Any]]) -> None: + if not records: + return + path.parent.mkdir(parents=True, exist_ok=True) + should_fsync = any(record.get("event_type") == "_tx_commit" for record in records) + with path.open("ab", buffering=0) as handle: + for record in records: + handle.write(framed_json_record_bytes(record)) + if should_fsync: + _fsync_file_descriptor(handle.fileno()) + if should_fsync: + fsync_dir(path.parent) + + +def append_framed_json_transaction( + path: Path, + tx_id: str, + records: Sequence[Mapping[str, Any]], +) -> None: + framed_records: list[dict[str, Any]] = [{"tx_id": tx_id, "event_type": "_tx_begin"}] + for record in records: + normalized = dict(record) + record_tx_id = normalized.get("tx_id") + if record_tx_id is not None and record_tx_id != tx_id: + raise ValueError(f"Framed record tx_id mismatch: expected {tx_id!r}, got {record_tx_id!r}") + normalized["tx_id"] = tx_id + framed_records.append(normalized) + framed_records.append({"tx_id": tx_id, "event_type": "_tx_commit"}) + append_framed_json_records(path, framed_records) + + +def iter_framed_json_records(path: Path) -> Iterable[dict[str, Any]]: + if not path.exists(): + return [] + + def _iterator() -> Iterable[dict[str, Any]]: + with path.open("rb") as handle: + while True: + length_bytes = handle.read(4) + if not length_bytes: + return + if len(length_bytes) < 4: + return + (payload_len,) = struct.unpack(">I", length_bytes) + if payload_len > MAX_FRAMED_JSON_RECORD_BYTES: + return + payload = handle.read(payload_len) + if len(payload) < payload_len: + return + newline = handle.read(1) + if newline != b"\n": + return + try: + record = json.loads(payload.decode("utf-8")) + except (UnicodeDecodeError, json.JSONDecodeError): + return + if isinstance(record, dict): + yield record + + return _iterator() + + +def committed_framed_json_transactions(path: Path) -> dict[str, list[dict[str, Any]]]: + committed: dict[str, list[dict[str, Any]]] = {} + pending: dict[str, list[dict[str, Any]]] = {} + for record in iter_framed_json_records(path): + tx_id = record.get("tx_id") + if not isinstance(tx_id, str) or not tx_id: + continue + event_type = record.get("event_type") + if event_type == "_tx_begin": + pending[tx_id] = [record] + continue + bucket = pending.get(tx_id) + if bucket is None: + continue + bucket.append(record) + if event_type == "_tx_commit": + committed[tx_id] = bucket + pending.pop(tx_id, None) + return committed + + +def read_committed_framed_json_records( + path: Path, + *, + include_markers: bool = False, +) -> list[dict[str, Any]]: + records: list[dict[str, Any]] = [] + for transaction in committed_framed_json_transactions(path).values(): + if include_markers: + records.extend(transaction) + continue + records.extend( + record + for record in transaction + if record.get("event_type") not in {"_tx_begin", "_tx_commit"} + ) + return records + + +def _stage_write_entry(entry: Mapping[str, Any]) -> None: + temp_path = Path(entry["temp_path"]) + _write_bytes_direct(temp_path, _restore_staged_payload(entry)) + + +def _stage_blob_entry(entry: Mapping[str, Any]) -> None: + staging_path = Path(entry["staging_path"]) + meta_path = Path(entry["meta_path"]) + staging_path.parent.mkdir(parents=True, exist_ok=True) + _write_bytes_direct(staging_path, _restore_staged_payload(entry)) + atomic_write_json(meta_path, entry.get("metadata", {})) + + +def prepare_journal_transaction( + root: Path, + tx_id: str, + *, + writes: Sequence[Mapping[str, Any]] = (), + event_logs: Sequence[Mapping[str, Any]] = (), + blobs: Sequence[Mapping[str, Any]] = (), +) -> Path: + normalized_writes = [dict(entry) for entry in writes] + normalized_event_logs = [dict(entry) for entry in event_logs] + normalized_blobs = [dict(entry) for entry in blobs] + for entry in normalized_writes: + target_path = Path(entry["target_path"]) + temp_path = entry.get("temp_path") + if not isinstance(temp_path, str) or ".tx-pending." in temp_path: + entry["temp_path"] = str(target_path.parent / f".{target_path.name}.tx-{tx_id}.tmp") + prepare_path = journal_prepare_path(root, tx_id) + payload = { + "tx_id": tx_id, + "prepared_at": now_utc(), + "writes": normalized_writes, + "event_logs": normalized_event_logs, + "blob_promotions": normalized_blobs, + } + atomic_write_json(prepare_path, payload) + for entry in normalized_writes: + _stage_write_entry(entry) + for entry in normalized_blobs: + _stage_blob_entry(entry) + return prepare_path + + +def write_journal_commit_marker(root: Path, tx_id: str) -> Path: + marker_path = journal_commit_path(root, tx_id) + _write_bytes_direct(marker_path, b"") + return marker_path + + +def _rename_with_fsync(src: Path, dest: Path) -> None: + dest.parent.mkdir(parents=True, exist_ok=True) + src.replace(dest) + fsync_dir(dest.parent) + + +def _apply_prepared_writes(payload: Mapping[str, Any]) -> None: + for entry in payload.get("writes", []): + target_path = Path(entry["target_path"]) + desired_sha = entry.get("content_sha256") + if target_path.exists() and desired_sha == _path_sha256(target_path): + temp_path = Path(entry["temp_path"]) + if temp_path.exists(): + temp_path.unlink() + continue + temp_path = Path(entry["temp_path"]) + if not temp_path.exists(): + _stage_write_entry(entry) + _rename_with_fsync(temp_path, target_path) + + +def _apply_prepared_blob_promotions(payload: Mapping[str, Any]) -> None: + for entry in payload.get("blob_promotions", []): + final_path = Path(entry["final_path"]) + desired_sha = entry.get("content_sha256") + atomic_write_json(Path(entry["meta_path"]), entry.get("metadata", {})) + if final_path.exists() and desired_sha == _path_sha256(final_path): + staging_path = Path(entry["staging_path"]) + if staging_path.exists(): + staging_path.unlink() + continue + staging_path = Path(entry["staging_path"]) + if not staging_path.exists(): + _stage_blob_entry(entry) + _rename_with_fsync(staging_path, final_path) + + +def _apply_prepared_event_logs(payload: Mapping[str, Any]) -> None: + tx_id = payload["tx_id"] + for entry in payload.get("event_logs", []): + log_path = Path(entry["path"]) + committed_ids = set(committed_framed_json_transactions(log_path)) + if tx_id in committed_ids: + continue + append_framed_json_transaction(log_path, tx_id, entry.get("records", [])) + + +def _cleanup_prepared_transaction(payload: Mapping[str, Any]) -> None: + for entry in payload.get("writes", []): + temp_path = Path(entry["temp_path"]) + if temp_path.exists(): + temp_path.unlink() + for entry in payload.get("blob_promotions", []): + staging_path = Path(entry["staging_path"]) + if staging_path.exists(): + staging_path.unlink() + prepare_path = journal_prepare_path(Path(payload["journal_root"]), payload["tx_id"]) + commit_path = journal_commit_path(Path(payload["journal_root"]), payload["tx_id"]) + if prepare_path.exists(): + prepare_path.unlink() + if commit_path.exists(): + commit_path.unlink() + fsync_dir(prepare_path.parent) + + +def commit_journal_transaction(root: Path, tx_id: str) -> None: + prepare_path = journal_prepare_path(root, tx_id) + payload = read_json(prepare_path) + if not isinstance(payload, dict): + raise ValueError(f"Malformed prepare payload at {prepare_path}") + payload["journal_root"] = str(root) + write_journal_commit_marker(root, tx_id) + _apply_prepared_writes(payload) + _apply_prepared_blob_promotions(payload) + _apply_prepared_event_logs(payload) + _cleanup_prepared_transaction(payload) + + +def discard_uncommitted_journal_transaction(root: Path, tx_id: str) -> None: + prepare_path = journal_prepare_path(root, tx_id) + if not prepare_path.exists(): + return + payload = read_json(prepare_path) + if not isinstance(payload, dict): + prepare_path.unlink() + fsync_dir(prepare_path.parent) + return + payload["journal_root"] = str(root) + _cleanup_prepared_transaction(payload) + + +def scrub_stale_staging_files(root: Path, *, older_than_seconds: int = 3600) -> list[Path]: + cutoff = time.time() - older_than_seconds + removed: list[Path] = [] + changed_dirs: set[Path] = set() + if not root.exists(): + return removed + for staging_path in root.rglob("*.staging"): + try: + if staging_path.stat().st_mtime > cutoff: + continue + except FileNotFoundError: + continue + staging_path.unlink(missing_ok=True) + removed.append(staging_path) + changed_dirs.add(staging_path.parent) + for directory in sorted(changed_dirs): + fsync_dir(directory) + return removed + + +def recover_journal(root: Path) -> dict[str, list[str]]: + journal_dir = journal_root(root) + result = {"replayed": [], "discarded": [], "scrubbed_staging": []} + result["scrubbed_staging"] = [str(path) for path in scrub_stale_staging_files(root)] + if not journal_dir.exists(): + return result + + for prepare_path in sorted(journal_dir.glob("tx-*.prepare.json")): + match = re.fullmatch(r"tx-(.+)\.prepare\.json", prepare_path.name) + if match is None: + continue + tx_id = match.group(1) + if journal_commit_path(root, tx_id).exists(): + commit_journal_transaction(root, tx_id) + result["replayed"].append(tx_id) + else: + discard_uncommitted_journal_transaction(root, tx_id) + result["discarded"].append(tx_id) + + for commit_path in sorted(journal_dir.glob("tx-*.commit")): + tx_id = commit_path.name[len("tx-") : -len(".commit")] + if not journal_prepare_path(root, tx_id).exists(): + commit_path.unlink() + fsync_dir(journal_dir) + + return result + + def load_finalize_snapshot(plan_dir: Path) -> dict[str, Any]: return read_json(plan_dir / "finalize_snapshot.json") @@ -320,6 +756,101 @@ def plans_root(root: Path) -> Path: return megaplan_root(root) / "plans" +def _git_common_dir(root: Path) -> Path | None: + git_path = root / ".git" + if git_path.is_dir(): + return git_path.resolve() + if not git_path.is_file(): + return None + try: + content = git_path.read_text(encoding="utf-8").strip() + except OSError: + return None + prefix = "gitdir:" + if not content.startswith(prefix): + return None + gitdir = Path(content[len(prefix):].strip()) + if not gitdir.is_absolute(): + gitdir = (root / gitdir).resolve() + if gitdir.parent.name == "worktrees": + return gitdir.parent.parent.resolve() + return gitdir.resolve() + + +def repo_storage_id(root: Path) -> str: + resolved = root.resolve() + common_dir = _git_common_dir(resolved) + identity_source = str(common_dir or resolved) + label_source = common_dir.parent.name if common_dir is not None else resolved.name + digest = hashlib.sha256(identity_source.encode("utf-8")).hexdigest()[:12] + return f"{slugify(label_source or 'repo')}-{digest}" + + +def canonical_megaplan_root(root: Path, *, home: Path | None = None) -> Path: + base_home = (home or Path.home()).expanduser().resolve() + return base_home / ".megaplan" / repo_storage_id(root) + + +def orphan_plans_root(root: Path, *, home: Path | None = None) -> Path: + return canonical_megaplan_root(root, home=home) / "orphan_plans" + + +def plan_search_roots(root: Path, *, home: Path | None = None) -> list[Path]: + """Return canonical and legacy plan roots for *root*. + + Canonical orphan plans live under ``~/.megaplan//orphan_plans``. + Legacy plans remain in-place under ``/.megaplan/plans`` until a later + migration step intentionally moves runtime writes across. + """ + + roots = [orphan_plans_root(root, home=home), plans_root(root)] + deduped: list[Path] = [] + seen: set[Path] = set() + for candidate in roots: + resolved = candidate.resolve(strict=False) + if resolved in seen: + continue + seen.add(resolved) + deduped.append(candidate) + return deduped + + +def has_any_plan_root(root: Path, *, home: Path | None = None) -> bool: + return any(candidate.is_dir() for candidate in plan_search_roots(root, home=home)) + + +def find_plan_dir(start: Path, requested_name: str, *, home: Path | None = None) -> Path | None: + """Find ``requested_name`` across canonical and legacy plan roots near *start*.""" + + resolved_start = start.resolve() + seen_project_roots: set[Path] = set() + + def _check(project_root: Path) -> Path | None: + resolved_project = project_root.resolve() + if resolved_project in seen_project_roots: + return None + seen_project_roots.add(resolved_project) + for candidate_root in plan_search_roots(resolved_project, home=home): + plan_dir = candidate_root / requested_name + if (plan_dir / "state.json").exists(): + return plan_dir + return None + + for project_root in (resolved_start, *resolved_start.parents): + plan_dir = _check(project_root) + if plan_dir is not None: + return plan_dir + + for megaplan_dir in sorted(resolved_start.rglob(".megaplan")): + if not megaplan_dir.is_dir(): + continue + plan_dir = _check(megaplan_dir.parent) + if plan_dir is not None: + return plan_dir + + return None + + def schemas_root(root: Path) -> Path: return megaplan_root(root) / "schemas" diff --git a/megaplan/_core/state.py b/megaplan/_core/state.py index 8d59beb8..2de46d46 100644 --- a/megaplan/_core/state.py +++ b/megaplan/_core/state.py @@ -26,7 +26,9 @@ atomic_write_json, atomic_write_text, current_iteration_raw_artifact, + find_plan_dir, now_utc, + plan_search_roots, plans_root, read_json, ) @@ -43,37 +45,22 @@ # --------------------------------------------------------------------------- def active_plan_dirs(root: Path) -> list[Path]: - if not plans_root(root).exists(): - return [] - directories: list[Path] = [] - for child in plans_root(root).iterdir(): - if child.is_dir() and (child / "state.json").exists(): - directories.append(child) - return sorted(directories) + by_name: dict[str, Path] = {} + for candidate_root in plan_search_roots(root): + if not candidate_root.exists(): + continue + for child in candidate_root.iterdir(): + if child.is_dir() and (child / "state.json").exists(): + by_name.setdefault(child.name, child) + return [by_name[name] for name in sorted(by_name)] def resolve_plan_dir(root: Path, requested_name: str | None) -> Path: plan_dirs = active_plan_dirs(root) if requested_name: - plan_dir = plans_root(root) / requested_name - if (plan_dir / "state.json").exists(): + plan_dir = find_plan_dir(root, requested_name) + if plan_dir is not None: return plan_dir - # Walk up parent directories — plan may live in an ancestor's .megaplan/ - current = root.resolve().parent - while True: - candidate = plans_root(current) / requested_name - if (candidate / "state.json").exists(): - return candidate - parent = current.parent - if parent == current: - break - current = parent - # Walk down child directories — plan may live in a subdirectory's .megaplan/ - for megaplan_dir in sorted(root.resolve().rglob(".megaplan")): - if megaplan_dir.is_dir(): - candidate = megaplan_dir / "plans" / requested_name - if (candidate / "state.json").exists(): - return candidate raise CliError("missing_plan", f"Plan '{requested_name}' does not exist") if not plan_dirs: raise CliError("missing_plan", "No plans found. Run init first.") diff --git a/megaplan/auto.py b/megaplan/auto.py index 23458c9e..e9adafad 100644 --- a/megaplan/auto.py +++ b/megaplan/auto.py @@ -26,6 +26,7 @@ from pathlib import Path from typing import Any, Callable +from megaplan._core import find_plan_dir from megaplan.types import ( AUTOMATION_TERMINAL_STATES, STATE_AWAITING_HUMAN, @@ -179,19 +180,8 @@ def _phase_command(next_step: str) -> list[str]: def _resolve_plan_dir(plan: str, cwd: Path | None) -> Path | None: - """Best-effort resolution of ``.megaplan/plans/`` near ``cwd``. - - Walks up parents of ``cwd`` looking for a ``.megaplan/plans/`` - directory, matching how ``megaplan status`` resolves plans. Returns - ``None`` if the plan dir can't be located — callers should treat that - as "no review marker available" and fall back to the plain stall-count. - """ - base = (cwd or Path.cwd()).resolve() - for candidate in (base, *base.parents): - plan_dir = candidate / ".megaplan" / "plans" / plan - if (plan_dir / "state.json").exists(): - return plan_dir - return None + """Best-effort resolution of legacy or canonical orphan plan roots near ``cwd``.""" + return find_plan_dir(cwd or Path.cwd(), plan) def _last_history_step_result(plan_dir: Path | None, step: str) -> str | None: diff --git a/megaplan/cli.py b/megaplan/cli.py index 9339fb13..cba3d9d2 100644 --- a/megaplan/cli.py +++ b/megaplan/cli.py @@ -33,6 +33,7 @@ escalated_subsystems, ensure_runtime_layout, get_effective, + has_any_plan_root, infer_next_steps, is_prose_mode, json_dump, @@ -425,7 +426,7 @@ def _collect_megaplan_roots(root: Path, *, tree: bool = False, all_system: bool # Walk up to find parent .megaplan directories current = root.resolve().parent while True: - if (current / ".megaplan" / "plans").is_dir() and current.resolve() != root.resolve(): + if has_any_plan_root(current) and current.resolve() != root.resolve(): roots.append(current) parent = current.parent if parent == current: @@ -433,9 +434,9 @@ def _collect_megaplan_roots(root: Path, *, tree: bool = False, all_system: bool current = parent # Walk down to find child .megaplan directories for megaplan_dir in sorted(root.rglob(".megaplan")): - if megaplan_dir.is_dir() and (megaplan_dir / "plans").is_dir(): + if megaplan_dir.is_dir(): candidate = megaplan_dir.parent - if candidate.resolve() != root.resolve(): + if has_any_plan_root(candidate) and candidate.resolve() != root.resolve(): roots.append(candidate) return roots diff --git a/megaplan/schemas/__init__.py b/megaplan/schemas/__init__.py new file mode 100644 index 00000000..4564819f --- /dev/null +++ b/megaplan/schemas/__init__.py @@ -0,0 +1,75 @@ +"""Megaplan schema package. + +This package keeps the historical ``megaplan.schemas`` import surface stable +while opening a dedicated namespace for the Sprint 1 storage models. +""" + +from .models import ( + AutomationActor, + BotTurn, + ChecklistItem, + CodeArtifact, + Codebase, + ControlMessage, + Epic, + EpicEvent, + EpicLock, + ExecutionLease, + ExternalRequest, + Feedback, + HomeBackend, + Image, + Message, + MigrationRun, + Plan, + PlanArtifact, + ProgressEvent, + SecondOpinion, + Sprint, + SprintItem, + StorageModel, + SystemLog, + ToolCall, + utc_now, +) +from .runtime import ( + SCHEMAS, + STANCE_SCHEMA, + STOP_SIGNAL_SCHEMA, + get_execution_schema_key, + strict_schema, +) + +__all__ = [ + "AutomationActor", + "BotTurn", + "ChecklistItem", + "CodeArtifact", + "Codebase", + "ControlMessage", + "Epic", + "EpicEvent", + "EpicLock", + "ExecutionLease", + "ExternalRequest", + "Feedback", + "HomeBackend", + "Image", + "Message", + "MigrationRun", + "Plan", + "PlanArtifact", + "ProgressEvent", + "SecondOpinion", + "Sprint", + "SprintItem", + "SCHEMAS", + "STANCE_SCHEMA", + "STOP_SIGNAL_SCHEMA", + "StorageModel", + "SystemLog", + "ToolCall", + "get_execution_schema_key", + "strict_schema", + "utc_now", +] diff --git a/megaplan/schemas/arnold.py b/megaplan/schemas/arnold.py new file mode 100644 index 00000000..1800431e --- /dev/null +++ b/megaplan/schemas/arnold.py @@ -0,0 +1,340 @@ +"""Pydantic mirrors of the Arnold Supabase tables.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Literal + +from pydantic import Field, field_validator, model_validator + +from .base import ( + HomeBackend, + NormalizedDict, + NormalizedList, + NormalizedStringList, + StorageModel, + utc_now, +) + +EpicState = Literal["shaping", "sprinting", "planned", "paused", "archived"] +BotTurnStatus = Literal["in_progress", "completed", "failed", "abandoned"] +MessageDirection = Literal["inbound", "outbound"] +ToolOperationKind = Literal["read", "write"] +SystemLogLevel = Literal["debug", "info", "warn", "error"] +SystemLogCategory = Literal["system", "application", "tool", "llm", "external_api", "recovery"] +ExternalRequestProvider = Literal["anthropic", "openai", "groq", "github", "discord", "supabase_storage"] +ExternalRequestStatus = Literal["pending", "sent", "confirmed", "failed", "orphaned"] +ImageSource = Literal["agent_generated", "user_uploaded", "caller_uploaded"] +ChecklistStatus = Literal["open", "done", "skipped", "superseded"] +ChecklistSource = Literal["bot_inferred", "user_requested", "carried_over", "default_seed", "second_opinion"] +EpicEventType = Literal[ + "body_edit", + "checklist_change", + "sprints_change", + "state_change", + "forced_handoff", + "created", + "code_referenced", + "codebase_added", + "image_generated", + "second_opinion_requested", + "reverted_to", + "sprint_status_change", +] +FeedbackKind = Literal[ + "style", + "process", + "epic_specific", + "friction", + "ambiguity", + "tool_failure", + "confusion", + "pattern_noticed", +] +FeedbackSource = Literal[ + "user_volunteered", + "agent_proposed_user_confirmed", + "explicit_save_request", + "agent_observation", +] +SprintStatus = Literal["proposed", "queued", "pending", "running", "done", "failed", "blocked", "cancelled"] +SprintItemComplexity = Literal["small", "medium", "large"] +SprintItemStatus = Literal["open", "in_progress", "done"] +SecondOpinionRequester = Literal["user", "auto_state_gate"] +CodebaseScope = Literal["global", "epic_specific"] +CodeArtifactKind = Literal["excerpt", "summary", "api_cache"] +CodeArtifactSource = Literal["conversation", "codebase"] +CodeArtifactScope = Literal["file", "directory", "cross_codebase"] + + +class Epic(StorageModel): + id: str + title: str + goal: str + body: str + state: EpicState + home_backend: HomeBackend = "file" + revision: int = 0 + created_at: datetime = Field(default_factory=utc_now) + last_edited_at: datetime = Field(default_factory=utc_now) + last_active_at: datetime | None = None + planned_at: datetime | None = None + + +class BotTurn(StorageModel): + id: str + epic_id: str | None = None + triggered_by_message_ids: NormalizedStringList = Field(default_factory=list) + prompt_snapshot: NormalizedDict | None = None + prompt_version: str | None = None + reasoning: str | None = None + final_output_message_id: str | None = None + status_message_id: str | None = None + status: BotTurnStatus + state_at_turn: NormalizedDict | None = None + plan_edited: bool = False + code_consulted: bool = False + image_generated: bool = False + second_opinion_requested: bool = False + message_sent: bool = False + warnings_issued: NormalizedList | None = None + current_activity: str | None = None + started_at: datetime = Field(default_factory=utc_now) + completed_at: datetime | None = None + model_version: str | None = None + + +class Message(StorageModel): + id: str + epic_id: str | None = None + direction: MessageDirection + content: str + sent_at: datetime = Field(default_factory=utc_now) + discord_message_id: str | None = None + has_code_attachment: bool = False + has_image_attachment: bool = False + in_burst_with: NormalizedStringList | None = None + was_voice_message: bool = False + audio_storage_url: str | None = None + transcription_metadata: NormalizedDict | None = None + bot_turn_id: str | None = None + + +class ToolCall(StorageModel): + id: str + turn_id: str + tool_name: str + operation_kind: ToolOperationKind + arguments: NormalizedDict = Field(default_factory=dict) + result: NormalizedDict = Field(default_factory=dict) + called_at: datetime = Field(default_factory=utc_now) + duration_ms: int = Field(default=0, ge=0) + + +class SystemLog(StorageModel): + id: str + level: SystemLogLevel + category: SystemLogCategory + event_type: str + message: str + details: NormalizedDict = Field(default_factory=dict) + turn_id: str | None = None + epic_id: str | None = None + occurred_at: datetime = Field(default_factory=utc_now) + + +class EpicLock(StorageModel): + epic_id: str + holder_id: str + acquired_at: datetime = Field(default_factory=utc_now) + expires_at: datetime + + +class ExternalRequest(StorageModel): + id: str + idempotency_key: str + provider: ExternalRequestProvider + endpoint: str + tool_call_id: str | None = None + turn_id: str | None = None + request_summary: NormalizedDict = Field(default_factory=dict) + request_body: NormalizedDict | None = None + status: ExternalRequestStatus + provider_request_id: str | None = None + provider_response_summary: NormalizedDict | None = None + attempt_count: int = Field(default=1, ge=1) + first_attempted_at: datetime = Field(default_factory=utc_now) + last_attempted_at: datetime = Field(default_factory=utc_now) + completed_at: datetime | None = None + error_details: NormalizedDict | None = None + + +class Image(StorageModel): + id: str + epic_id: str | None = None + source: ImageSource + prompt: str | None = None + storage_url: str + quality: str | None = None + size: str | None = None + created_at: datetime = Field(default_factory=utc_now) + reference_key: str + description: str | None = None + caption: str | None = None + in_body: bool = False + active: bool = True + discord_attachment_id: str | None = None + + +class ChecklistItem(StorageModel): + id: str + epic_id: str + content: str + status: ChecklistStatus | None = None + position: int = Field(gt=0) + source: ChecklistSource | None = None + skip_reason: str | None = None + superseded_by_item_id: str | None = None + created_at: datetime = Field(default_factory=utc_now) + completed_at: datetime | None = None + + +class EpicEvent(StorageModel): + id: str + epic_id: str + transaction_id: str + event_type: EpicEventType | None = None + summary: str + prior_state: NormalizedDict | None = None + turn_id: str | None = None + occurred_at: datetime = Field(default_factory=utc_now) + + +class Feedback(StorageModel): + id: str + kind: FeedbackKind + content: str + source: FeedbackSource + source_message_id: str | None = None + epic_id: str | None = None + turn_id: str | None = None + context_snapshot: NormalizedDict | None = None + active: bool = True + deactivation_reason: str | None = None + resolved: bool = False + resolution_note: str | None = None + resolved_at: datetime | None = None + created_at: datetime = Field(default_factory=utc_now) + last_referenced_at: datetime | None = None + last_applied_at: datetime | None = None + + @model_validator(mode="after") + def _validate_kind_source(self) -> Feedback: + user_kinds = {"style", "process", "epic_specific"} + user_sources = { + "user_volunteered", + "agent_proposed_user_confirmed", + "explicit_save_request", + } + observation_kinds = { + "friction", + "ambiguity", + "tool_failure", + "confusion", + "pattern_noticed", + } + if self.kind in user_kinds and self.source not in user_sources: + raise ValueError("user-facing feedback kinds require a user-confirmed source") + if self.kind in observation_kinds and self.source != "agent_observation": + raise ValueError("observation feedback kinds require source='agent_observation'") + return self + + +class Sprint(StorageModel): + id: str + epic_id: str + sprint_number: int = Field(gt=0) + name: str + goal: str + status: SprintStatus + revision: int = 0 + queue_position: int | None = Field(default=None, gt=0) + pending_reason: str | None = None + target_weeks: int = Field(default=2, gt=0) + created_at: datetime = Field(default_factory=utc_now) + updated_at: datetime = Field(default_factory=utc_now) + queued_at: datetime | None = None + + @model_validator(mode="after") + def _validate_status_fields(self) -> Sprint: + if self.status == "queued" and self.queue_position is None: + raise ValueError("queued sprints require queue_position") + if self.status == "pending" and not self.pending_reason: + raise ValueError("pending sprints require pending_reason") + if self.status != "queued" and self.queue_position is not None: + raise ValueError("queue_position is only valid for queued sprints") + return self + + +class SprintItem(StorageModel): + id: str + sprint_id: str + content: str + estimated_complexity: SprintItemComplexity + status: SprintItemStatus + source_section: str | None = None + position: int = Field(gt=0) + created_at: datetime = Field(default_factory=utc_now) + + +class SecondOpinion(StorageModel): + id: str + epic_id: str + requested_at: datetime = Field(default_factory=utc_now) + requested_by: SecondOpinionRequester + focus_areas: NormalizedStringList = Field(default_factory=list) + raw_response: str + score: int = Field(ge=0, le=10) + summary: str + verdict: str + resulting_checklist_item_ids: NormalizedStringList = Field(default_factory=list) + model_used: str + + +class Codebase(StorageModel): + id: str + owner: str + name: str + default_branch: str + scope: CodebaseScope = "global" + group_name: str | None = None + associated_epic_id: str | None = None + added_at: datetime = Field(default_factory=utc_now) + added_via: str = "manual" + last_accessed_at: datetime | None = None + verified_accessible_at: datetime | None = None + notes: str | None = None + + @field_validator("owner", "name") + @classmethod + def _require_lowercase_identifier(cls, value: str) -> str: + if not value or value != value.lower(): + raise ValueError("codebase owner and name must be non-empty lowercase strings") + return value + + +class CodeArtifact(StorageModel): + id: str + codebase_id: str | None = None + epic_id: str | None = None + kind: CodeArtifactKind + source: CodeArtifactSource + file_path: str | None = None + line_range: NormalizedDict | None = None + scope: CodeArtifactScope | None = None + content: str + content_summary: str | None = None + metadata: NormalizedDict = Field(default_factory=dict) + created_at: datetime = Field(default_factory=utc_now) + last_used_at: datetime | None = None + expires_at: datetime | None = None diff --git a/megaplan/schemas/base.py b/megaplan/schemas/base.py new file mode 100644 index 00000000..cfdc981c --- /dev/null +++ b/megaplan/schemas/base.py @@ -0,0 +1,49 @@ +"""Shared base types and helpers for Sprint 1 storage models.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, Annotated, Literal + +from pydantic import BaseModel, BeforeValidator, ConfigDict, model_validator + + +def utc_now() -> datetime: + return datetime.now(timezone.utc) + + +def _default_dict(value: Any) -> Any: + return {} if value is None else value + + +def _default_list(value: Any) -> Any: + if value is None: + return [] + if isinstance(value, (tuple, set)): + return list(value) + return value + + +NormalizedDict = Annotated[dict[str, Any], BeforeValidator(_default_dict)] +NormalizedList = Annotated[list[Any], BeforeValidator(_default_list)] +NormalizedStringList = Annotated[list[str], BeforeValidator(_default_list)] +HomeBackend = Literal["file", "db"] + + +class StorageModel(BaseModel): + """Strict base model for backend-facing storage records.""" + + model_config = ConfigDict(extra="forbid", populate_by_name=True, validate_assignment=True) + + @model_validator(mode="after") + def _normalize_datetimes(self) -> StorageModel: + for field_name in self.__class__.model_fields: + value = getattr(self, field_name, None) + if isinstance(value, datetime): + if value.tzinfo is None: + normalized = value.replace(tzinfo=timezone.utc) + else: + normalized = value.astimezone(timezone.utc) + if normalized != value: + object.__setattr__(self, field_name, normalized) + return self diff --git a/megaplan/schemas/models.py b/megaplan/schemas/models.py new file mode 100644 index 00000000..81cf4225 --- /dev/null +++ b/megaplan/schemas/models.py @@ -0,0 +1,61 @@ +"""Aggregated Sprint 1 storage model exports.""" + +from __future__ import annotations + +from .arnold import ( + BotTurn, + ChecklistItem, + CodeArtifact, + Codebase, + Epic, + EpicEvent, + EpicLock, + ExternalRequest, + Feedback, + Image, + Message, + SecondOpinion, + Sprint, + SprintItem, + SystemLog, + ToolCall, +) +from .base import HomeBackend, StorageModel, utc_now +from .sprint1 import ( + AutomationActor, + ControlMessage, + ExecutionLease, + MigrationRun, + Plan, + PlanArtifact, + ProgressEvent, +) + +__all__ = [ + "AutomationActor", + "BotTurn", + "ChecklistItem", + "CodeArtifact", + "Codebase", + "ControlMessage", + "Epic", + "EpicEvent", + "EpicLock", + "ExecutionLease", + "ExternalRequest", + "Feedback", + "HomeBackend", + "Image", + "Message", + "MigrationRun", + "Plan", + "PlanArtifact", + "ProgressEvent", + "SecondOpinion", + "Sprint", + "SprintItem", + "StorageModel", + "SystemLog", + "ToolCall", + "utc_now", +] diff --git a/megaplan/schemas.py b/megaplan/schemas/runtime.py similarity index 100% rename from megaplan/schemas.py rename to megaplan/schemas/runtime.py diff --git a/megaplan/schemas/sprint1.py b/megaplan/schemas/sprint1.py new file mode 100644 index 00000000..e112877a --- /dev/null +++ b/megaplan/schemas/sprint1.py @@ -0,0 +1,255 @@ +"""Sprint 1 storage extensions and Plan compatibility models.""" + +from __future__ import annotations + +from copy import deepcopy +from datetime import datetime +from typing import Any, Literal, cast + +from pydantic import Field + +from megaplan.types import ( + ActiveStep, + ClarificationRecord, + HistoryEntry, + LastGateRecord, + PlanConfig, + PlanMeta, + PlanState, + PlanVersionRecord, + SessionInfo, +) + +from .base import HomeBackend, NormalizedDict, NormalizedList, NormalizedStringList, StorageModel, utc_now + +MigrationPhase = Literal[ + "planning", + "copying_meta", + "copying_blobs", + "verifying", + "cutting_over", + "tombstoning", + "complete", + "aborted", +] +PlanArtifactKind = Literal["markdown", "json", "jsonl", "raw_text", "lock", "derived"] +PlanArtifactRole = Literal[ + "plan_version", + "plan_meta", + "critique", + "gate", + "gate_signals", + "finalize", + "finalize_snapshot", + "execution_batch", + "execution", + "execution_audit", + "execution_checkpoint", + "execution_trace", + "faults", + "receipt", + "review", + "raw_worker_output", + "template", + "derived_final", + "prep", + "research", + "directors_notes", + "human_verifications", + "tiebreaker_decisions", + "tiebreaker_payload", +] +WorkerKind = Literal["local_cli", "cloud_worker", "auto_driver"] +ControlIntent = Literal[ + "run_sprint", + "pause_plan", + "resume_plan", + "approve_gate", + "reject_gate", + "cancel_run", + "manual_fix", + "request_inspect", +] +ProgressEventKind = Literal[ + "phase_start", + "phase_end", + "batch_complete", + "gate_pending", + "gate_resolved", + "plan_done", + "plan_failed", + "execution_blocked", + "manual_fix_attached", +] +AutomationActorKind = Literal["cli", "cloud_worker", "ci", "admin"] + + +class MigrationRun(StorageModel): + id: str + epic_id: str + source_backend: HomeBackend + target_backend: HomeBackend + phase: MigrationPhase + manifest: NormalizedDict = Field(default_factory=dict) + copied_ids: NormalizedDict = Field(default_factory=dict) + blob_copy_progress: NormalizedDict = Field(default_factory=dict) + started_at: datetime = Field(default_factory=utc_now) + updated_at: datetime = Field(default_factory=utc_now) + completed_at: datetime | None = None + holder_id: str + expires_at: datetime + + +class ExecutionLease(StorageModel): + plan_id: str + epic_id: str | None = None + holder_id: str + phase: str + worker_kind: WorkerKind + acquired_at: datetime = Field(default_factory=utc_now) + heartbeat_at: datetime = Field(default_factory=utc_now) + expires_at: datetime + + +class PlanArtifact(StorageModel): + name: str + kind: PlanArtifactKind + role: PlanArtifactRole + version: int | None = None + batch: int | None = None + phase: str | None = None + content_text: str | None = None + content_json: dict[str, Any] | list[Any] | None = None + sha256: str + created_at: datetime = Field(default_factory=utc_now) + updated_at: datetime = Field(default_factory=utc_now) + + +class ControlMessage(StorageModel): + id: str + epic_id: str + actor_id: str + intent: ControlIntent + target_id: str + payload: NormalizedDict = Field(default_factory=dict) + idempotency_key: str + created_at: datetime = Field(default_factory=utc_now) + processor_id: str | None = None + claimed_at: datetime | None = None + processed_at: datetime | None = None + result: NormalizedDict | None = None + + +class ProgressEvent(StorageModel): + id: str + epic_id: str + plan_id: str | None = None + sprint_id: str | None = None + kind: ProgressEventKind + summary: str + details: NormalizedDict = Field(default_factory=dict) + occurred_at: datetime = Field(default_factory=utc_now) + + +class AutomationActor(StorageModel): + id: str + name: str + granted_epic_ids: Literal["*"] | NormalizedStringList + actor_kind: AutomationActorKind + created_at: datetime = Field(default_factory=utc_now) + last_active_at: datetime | None = None + + +class Plan(StorageModel): + id: str + name: str + epic_id: str | None = None + sprint_id: str | None = None + revision: int + idea: str + current_state: str + iteration: int + config: dict[str, Any] + sessions: dict[str, dict[str, Any]] + plan_versions: list[dict[str, Any]] + history: list[dict[str, Any]] + meta: dict[str, Any] + last_gate: dict[str, Any] + active_step: dict[str, Any] | None = None + clarification: dict[str, Any] | None = None + latest_finalize: dict[str, Any] | None = None + latest_review: dict[str, Any] | None = None + latest_execution: dict[str, Any] | None = None + latest_failure: dict[str, Any] | None = None + artifacts: list[PlanArtifact] = Field(default_factory=list) + created_at: datetime + updated_at: datetime + + @classmethod + def from_plan_state( + cls, + state: PlanState | dict[str, Any], + *, + plan_id: str | None = None, + epic_id: str | None = None, + sprint_id: str | None = None, + revision: int = 0, + artifacts: list[PlanArtifact] | None = None, + latest_finalize: dict[str, Any] | None = None, + latest_review: dict[str, Any] | None = None, + latest_execution: dict[str, Any] | None = None, + latest_failure: dict[str, Any] | None = None, + updated_at: datetime | None = None, + ) -> Plan: + raw = deepcopy(dict(state)) + created_at = raw["created_at"] + plan = cls( + id=plan_id or raw["name"], + name=raw["name"], + epic_id=epic_id, + sprint_id=sprint_id, + revision=revision, + idea=raw["idea"], + current_state=raw["current_state"], + iteration=raw["iteration"], + config=raw["config"], + sessions=raw["sessions"], + plan_versions=raw["plan_versions"], + history=raw["history"], + meta=raw["meta"], + last_gate=raw["last_gate"], + active_step=raw.get("active_step"), + clarification=raw.get("clarification"), + latest_finalize=latest_finalize, + latest_review=latest_review, + latest_execution=latest_execution, + latest_failure=latest_failure, + artifacts=artifacts or [], + created_at=created_at, + updated_at=updated_at or created_at, + ) + return plan + + @classmethod + def from_state(cls, state: PlanState | dict[str, Any], **kwargs: Any) -> Plan: + return cls.from_plan_state(state, **kwargs) + + def to_plan_state(self) -> PlanState: + state: PlanState = { + "name": self.name, + "idea": self.idea, + "current_state": self.current_state, + "iteration": self.iteration, + "created_at": self.created_at.isoformat().replace("+00:00", "Z"), + "config": cast(PlanConfig, deepcopy(self.config)), + "sessions": cast(dict[str, SessionInfo], deepcopy(self.sessions)), + "plan_versions": cast(list[PlanVersionRecord], deepcopy(self.plan_versions)), + "history": cast(list[HistoryEntry], deepcopy(self.history)), + "meta": cast(PlanMeta, deepcopy(self.meta)), + "last_gate": cast(LastGateRecord, deepcopy(self.last_gate)), + } + if self.active_step is not None: + state["active_step"] = cast(ActiveStep, deepcopy(self.active_step)) + if self.clarification is not None: + state["clarification"] = cast(ClarificationRecord, deepcopy(self.clarification)) + return state diff --git a/megaplan/store/__init__.py b/megaplan/store/__init__.py new file mode 100644 index 00000000..2032a4e3 --- /dev/null +++ b/megaplan/store/__init__.py @@ -0,0 +1,58 @@ +"""Storage package seams for the Sprint 1 backend refactor.""" + +from .base import ( + ArtifactRef, + ArtifactStat, + Backend, + ChecklistItemInput, + ControlMessageInput, + EpicSummary, + HotContext, + Lease, + LeaseConflict, + LockConflict, + MessageSearchHit, + ProgressEventInput, + RevisionConflict, + SprintItemInput, + SprintWithItems, + Store, + StoreError, + Transaction, +) +from .blob import BlobMissingError, BlobRef, BlobStat, BlobStore, LocalDirBlobStore +from .compat import ArnoldBlobAdapter, ArnoldStoreAdapter +from .db import DBStore +from .file import FileStore +from .plan_repository import PlanRepository + +__all__ = [ + "ArnoldBlobAdapter", + "ArnoldStoreAdapter", + "ArtifactRef", + "ArtifactStat", + "Backend", + "BlobMissingError", + "BlobRef", + "BlobStat", + "BlobStore", + "ChecklistItemInput", + "ControlMessageInput", + "DBStore", + "EpicSummary", + "FileStore", + "HotContext", + "Lease", + "LeaseConflict", + "LockConflict", + "LocalDirBlobStore", + "MessageSearchHit", + "PlanRepository", + "ProgressEventInput", + "RevisionConflict", + "SprintItemInput", + "SprintWithItems", + "Store", + "StoreError", + "Transaction", +] diff --git a/megaplan/store/base.py b/megaplan/store/base.py new file mode 100644 index 00000000..9b432c71 --- /dev/null +++ b/megaplan/store/base.py @@ -0,0 +1,900 @@ +"""Core storage contracts, record shapes, and compatibility helpers.""" + +from __future__ import annotations + +from contextlib import AbstractContextManager +from datetime import datetime +from types import TracebackType +from typing import Any, Literal, Mapping, Protocol, Sequence, TypeAlias, runtime_checkable + +from pydantic import Field + +from megaplan.schemas import ( + AutomationActor, + BotTurn, + ChecklistItem, + CodeArtifact, + Codebase, + ControlMessage, + Epic, + EpicEvent, + EpicLock, + ExecutionLease, + ExternalRequest, + Feedback, + Image, + Message, + Plan, + ProgressEvent, + SecondOpinion, + Sprint, + SprintItem, + StorageModel, + SystemLog, + ToolCall, +) +from megaplan.schemas.base import NormalizedDict, utc_now + +Backend = Literal["file", "db"] +JSONDict: TypeAlias = dict[str, Any] + + +class StoreError(RuntimeError): + """Base exception for store-contract failures.""" + + +class RevisionConflict(StoreError): + """Raised when an optimistic-concurrency write sees a stale revision.""" + + +class LockConflict(StoreError): + """Raised when an epic lock is held by another actor.""" + + +class LeaseConflict(StoreError): + """Raised when an execution lease is already held.""" + + +class ChecklistItemInput(StorageModel): + id: str | None = None + content: str + status: str = "open" + position: int | None = Field(default=None, gt=0) + source: str = "bot_inferred" + skip_reason: str | None = None + superseded_by_item_id: str | None = None + created_at: datetime | None = None + completed_at: datetime | None = None + + +class SprintItemInput(StorageModel): + id: str | None = None + content: str + estimated_complexity: str = "medium" + status: str = "open" + source_section: str | None = None + position: int | None = Field(default=None, gt=0) + created_at: datetime | None = None + + +class EpicSummary(Epic): + snippet: str | None = None + rank: float | int | None = None + + +class MessageSearchHit(Message): + snippet: str | None = None + rank: float | int | None = None + + +class SprintWithItems(Sprint): + items: list[SprintItem] = Field(default_factory=list) + + +class HotContext(StorageModel): + epic: Epic | None = None + recent_messages: list[Message] = Field(default_factory=list) + recent_tool_calls: list[ToolCall] = Field(default_factory=list) + active_feedback: list[Feedback] = Field(default_factory=list) + unresolved_observations: list[Feedback] = Field(default_factory=list) + sprints: list[SprintWithItems] = Field(default_factory=list) + codebases: list[Codebase] = Field(default_factory=list) + recent_code_artifacts: list[CodeArtifact] = Field(default_factory=list) + active_images: list[Image] = Field(default_factory=list) + recent_second_opinions: list[SecondOpinion] = Field(default_factory=list) + all_sprints_pending_no_queued: bool = False + + +class ArtifactRef(StorageModel): + plan_id: str + name: str + kind: str | None = None + role: str | None = None + size_bytes: int | None = None + sha256: str | None = None + updated_at: datetime | None = None + + +class ArtifactStat(StorageModel): + plan_id: str + name: str + size_bytes: int + sha256: str | None = None + updated_at: datetime = Field(default_factory=utc_now) + + +class ControlMessageInput(StorageModel): + epic_id: str + actor_id: str + intent: str + target_id: str + payload: NormalizedDict = Field(default_factory=dict) + idempotency_key: str + + +class ProgressEventInput(StorageModel): + epic_id: str + plan_id: str | None = None + sprint_id: str | None = None + kind: str + summary: str + details: NormalizedDict = Field(default_factory=dict) + + +Lease = ExecutionLease + + +@runtime_checkable +class Transaction(Protocol): + """Context-manager shape used by Store.transaction().""" + + def __enter__(self) -> Transaction: + ... + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> bool | None: + ... + + +@runtime_checkable +class Store(Protocol): + """Canonical Sprint 1 storage contract. + + The interface follows the refined design-doc surface while preserving the + live Arnold caller API that still drives editorial operations today. + """ + + # ---------- Transaction ---------- + def transaction(self, epic_id: str | None = None) -> AbstractContextManager[Transaction]: + ... + + # ---------- Epic ---------- + def create_epic( + self, + *, + title: str, + goal: str, + body: str, + state: str = "shaping", + home_backend: Backend = "file", + ) -> Epic: + ... + + def load_epic(self, epic_id: str) -> Epic | None: + ... + + def update_epic( + self, + epic_id: str, + *, + expected_revision: int | None = None, + **changes: Any, + ) -> Epic: + ... + + def list_epics( + self, + *, + active_only: bool = True, + limit: int = 50, + home_backend: Backend | None = None, + ) -> list[EpicSummary]: + ... + + def search_epics( + self, + *, + query: str, + active_only: bool = True, + limit: int = 20, + ) -> list[EpicSummary]: + ... + + # ---------- Body ---------- + def load_body(self, epic_id: str) -> str: + ... + + def update_body(self, epic_id: str, body: str, *, expected_revision: int) -> Epic: + ... + + # ---------- Checklist ---------- + def seed_checklist(self, epic_id: str, items: Sequence[str]) -> list[ChecklistItem]: + ... + + def list_checklist_items( + self, + epic_id: str, + *, + status: str | None = None, + ) -> list[ChecklistItem]: + ... + + def add_checklist_items( + self, + epic_id: str, + items: Sequence[ChecklistItemInput], + ) -> list[ChecklistItem]: + ... + + def update_checklist_item(self, item_id: str, **changes: Any) -> ChecklistItem: + ... + + def delete_checklist_items(self, item_ids: Sequence[str]) -> None: + ... + + def replace_checklist( + self, + epic_id: str, + items: Sequence[ChecklistItemInput], + ) -> list[ChecklistItem]: + ... + + # ---------- Sprints ---------- + def create_sprint( + self, + *, + epic_id: str, + sprint_number: int, + name: str, + goal: str, + status: str = "proposed", + queue_position: int | None = None, + pending_reason: str | None = None, + target_weeks: int = 2, + ) -> Sprint: + ... + + def load_sprint(self, sprint_id: str) -> Sprint | None: + ... + + def list_sprints( + self, + epic_id: str, + *, + status: str | None = None, + ) -> list[Sprint]: + ... + + def list_sprints_with_items(self, epic_id: str) -> list[SprintWithItems]: + ... + + def update_sprint( + self, + sprint_id: str, + *, + expected_revision: int | None = None, + **changes: Any, + ) -> Sprint: + ... + + def delete_sprint(self, sprint_id: str) -> None: + ... + + def replace_sprint_items( + self, + sprint_id: str, + items: Sequence[SprintItemInput], + ) -> list[SprintItem]: + ... + + def list_sprint_items(self, sprint_id: str) -> list[SprintItem]: + ... + + def set_sprint_queue( + self, + epic_id: str, + ordered_sprint_ids: Sequence[str], + pending: Mapping[str, str], + ) -> list[Sprint]: + ... + + # ---------- Events ---------- + def record_epic_event( + self, + *, + epic_id: str, + transaction_id: str, + event_type: str, + summary: str, + prior_state: JSONDict | None, + turn_id: str | None, + ) -> EpicEvent: + ... + + def list_epic_events( + self, + epic_id: str, + *, + since: str | None = None, + until: str | None = None, + kinds: Sequence[str] | None = None, + limit: int | None = None, + ) -> list[EpicEvent]: + ... + + def latest_transaction_id(self, epic_id: str) -> str | None: + ... + + def events_by_transaction(self, transaction_id: str) -> list[EpicEvent]: + ... + + # ---------- Messages / turns ---------- + def create_message( + self, + *, + epic_id: str | None, + direction: str, + content: str, + discord_message_id: str | None = None, + bot_turn_id: str | None = None, + has_code_attachment: bool = False, + has_image_attachment: bool = False, + in_burst_with: Sequence[str] | None = None, + was_voice_message: bool = False, + audio_storage_url: str | None = None, + transcription_metadata: JSONDict | None = None, + synthesize_outbound_id: bool = True, + ) -> Message: + ... + + def load_message(self, message_id: str) -> Message | None: + ... + + def load_messages(self, message_ids: Sequence[str]) -> list[Message]: + ... + + def update_message(self, message_id: str, **changes: Any) -> Message: + ... + + def latest_outbound_message(self, *, epic_id: str | None = None) -> Message | None: + ... + + def create_turn( + self, + *, + epic_id: str | None, + triggered_by_message_ids: Sequence[str], + prompt_snapshot: JSONDict | None = None, + prompt_version: str | None = None, + state_at_turn: JSONDict | None = None, + model_version: str | None = None, + ) -> BotTurn: + ... + + def update_turn(self, turn_id: str, **changes: Any) -> BotTurn: + ... + + def find_abandoned_turns(self, older_than_seconds: int) -> list[BotTurn]: + ... + + def list_recent_turns( + self, + *, + n: int = 10, + epic_id: str | None = None, + ) -> list[BotTurn]: + ... + + def search_messages( + self, + *, + query: str, + epic_id: str | None = None, + limit: int = 20, + ) -> list[MessageSearchHit]: + ... + + def record_tool_call( + self, + *, + turn_id: str, + tool_name: str, + operation_kind: str, + arguments: JSONDict, + result: JSONDict, + duration_ms: int, + ) -> ToolCall: + ... + + def search_tool_calls_by( + self, + *, + tool_name: str | None = None, + epic_id: str | None = None, + since: str | None = None, + limit: int = 20, + ) -> list[ToolCall]: + ... + + def log_system_event( + self, + *, + level: str, + category: str, + event_type: str, + message: str, + details: JSONDict | None = None, + turn_id: str | None = None, + epic_id: str | None = None, + ) -> SystemLog: + ... + + def load_hot_context(self, epic_id: str | None) -> HotContext: + ... + + def find_unprocessed_messages( + self, + epic_id: str, + started_at: str, + exclude_ids: Sequence[str], + ) -> list[Message]: + ... + + # ---------- External request ledger ---------- + def insert_pending( + self, + *, + idempotency_key: str, + provider: str, + endpoint: str, + request_summary: JSONDict, + request_body: JSONDict | None = None, + turn_id: str | None = None, + tool_call_id: str | None = None, + ) -> ExternalRequest: + ... + + def mark_confirmed( + self, + request_id: str, + *, + provider_request_id: str | None = None, + provider_response_summary: JSONDict | None = None, + ) -> ExternalRequest: + ... + + def mark_failed( + self, + request_id: str, + *, + error_details: JSONDict, + ) -> ExternalRequest: + ... + + def find_pending_external_requests(self, older_than_seconds: int) -> list[ExternalRequest]: + ... + + def mark_orphaned( + self, + request_id: str, + *, + error_details: JSONDict, + ) -> ExternalRequest: + ... + + # ---------- Images ---------- + def create_image( + self, + *, + epic_id: str, + source: str, + storage_url: str, + prompt: str | None = None, + quality: str | None = None, + size: str | None = None, + reference_key: str | None = None, + description: str | None = None, + caption: str | None = None, + in_body: bool = False, + active: bool = True, + discord_attachment_id: str | None = None, + ) -> Image: + ... + + def load_image(self, image_id: str) -> Image | None: + ... + + def list_images( + self, + *, + epic_id: str, + source: str | None = None, + active: bool | None = True, + ) -> list[Image]: + ... + + def update_image(self, image_id: str, **changes: Any) -> Image: + ... + + def list_active_images(self, epic_id: str) -> list[Image]: + ... + + def load_active_image_by_reference(self, epic_id: str, reference_key: str) -> Image | None: + ... + + def active_image_reference_exists(self, epic_id: str, reference_key: str) -> bool: + ... + + def deactivate_active_image_reference(self, epic_id: str, reference_key: str) -> list[Image]: + ... + + # ---------- Second opinions ---------- + def create_second_opinion( + self, + *, + epic_id: str, + requested_by: str, + focus_areas: Sequence[str], + raw_response: str, + score: int, + summary: str, + verdict: str, + model_used: str, + resulting_checklist_item_ids: Sequence[str] | None = None, + ) -> SecondOpinion: + ... + + def list_second_opinions(self, epic_id: str, *, limit: int | None = None) -> list[SecondOpinion]: + ... + + def set_second_opinion_checklist_items( + self, + second_opinion_id: str, + checklist_item_ids: Sequence[str], + ) -> SecondOpinion: + ... + + # ---------- Codebases / artifacts ---------- + def create_codebase( + self, + *, + owner: str, + name: str, + default_branch: str, + scope: str = "global", + group_name: str | None = None, + associated_epic_id: str | None = None, + added_via: str = "manual", + verified_accessible_at: str | None = None, + notes: str | None = None, + codebase_id: str | None = None, + ) -> Codebase: + ... + + def upsert_codebase( + self, + *, + owner: str, + name: str, + default_branch: str, + scope: str = "global", + group_name: str | None = None, + associated_epic_id: str | None = None, + added_via: str = "manual", + verified_accessible_at: str | None = None, + notes: str | None = None, + ) -> Codebase: + ... + + def load_codebase(self, codebase_id: str) -> Codebase | None: + ... + + def find_codebase(self, owner: str, name: str) -> Codebase | None: + ... + + def list_codebases( + self, + *, + scope: str | None = None, + group_name: str | None = None, + epic_id: str | None = None, + include_global: bool = True, + ) -> list[Codebase]: + ... + + def update_codebase(self, codebase_id: str, **changes: Any) -> Codebase: + ... + + def remove_codebase(self, codebase_id: str) -> None: + ... + + def touch_codebase_accessed( + self, + codebase_id: str, + *, + accessed_at: str | None = None, + ) -> Codebase: + ... + + def mark_codebase_verified( + self, + codebase_id: str, + *, + verified_at: str | None = None, + default_branch: str | None = None, + ) -> Codebase: + ... + + def create_code_artifact( + self, + *, + kind: str, + source: str, + content: str, + codebase_id: str | None = None, + epic_id: str | None = None, + file_path: str | None = None, + line_range: Any = None, + scope: str | None = None, + content_summary: str | None = None, + metadata: JSONDict | None = None, + expires_at: str | None = None, + artifact_id: str | None = None, + ) -> CodeArtifact: + ... + + def load_code_artifact(self, artifact_id: str) -> CodeArtifact | None: + ... + + def list_code_artifacts( + self, + *, + codebase_id: str | None = None, + epic_id: str | None = None, + kind: str | None = None, + source: str | None = None, + file_path: str | None = None, + scope: str | None = None, + include_expired: bool = True, + limit: int | None = 50, + ) -> list[CodeArtifact]: + ... + + def update_code_artifact(self, artifact_id: str, **changes: Any) -> CodeArtifact: + ... + + def delete_code_artifact(self, artifact_id: str) -> None: + ... + + def touch_code_artifact_used( + self, + artifact_id: str, + *, + used_at: str | None = None, + ) -> CodeArtifact: + ... + + def get_api_cache( + self, + cache_key: str, + *, + now: str | None = None, + touch: bool = True, + ) -> CodeArtifact | None: + ... + + def upsert_api_cache( + self, + *, + cache_key: str, + content: str, + content_summary: str | None = None, + metadata: JSONDict | None = None, + codebase_id: str | None = None, + epic_id: str | None = None, + file_path: str | None = None, + scope: str | None = None, + expires_at: str | None = None, + ttl_seconds: int = 3600, + ) -> CodeArtifact: + ... + + def cleanup_expired_api_cache(self, *, now: str | None = None) -> int: + ... + + # ---------- Feedback ---------- + def create_feedback( + self, + *, + kind: str, + content: str, + source: str, + source_message_id: str | None = None, + epic_id: str | None = None, + turn_id: str | None = None, + context_snapshot: JSONDict | None = None, + ) -> Feedback: + ... + + def load_feedback(self, feedback_id: str) -> Feedback | None: + ... + + def update_feedback(self, feedback_id: str, **changes: Any) -> Feedback: + ... + + def list_feedback( + self, + *, + epic_id: str | None = None, + active: bool | None = None, + kinds: Sequence[str] | None = None, + limit: int | None = None, + ) -> list[Feedback]: + ... + + def list_observations( + self, + *, + resolved: bool | None = None, + limit: int | None = None, + ) -> list[Feedback]: + ... + + # ---------- Plan + PlanArtifact ---------- + def create_plan( + self, + *, + sprint_id: str | None, + epic_id: str | None, + name: str, + idea: str, + **fields: Any, + ) -> Plan: + ... + + def load_plan(self, plan_id: str) -> Plan | None: + ... + + def update_plan( + self, + plan_id: str, + *, + expected_revision: int | None = None, + **changes: Any, + ) -> Plan: + ... + + def list_plans( + self, + *, + sprint_id: str | None = None, + epic_id: str | None = None, + include_orphans: bool = False, + ) -> list[Plan]: + ... + + def read_plan_artifact(self, plan_id: str, name: str) -> bytes | None: + ... + + def write_plan_artifact( + self, + plan_id: str, + name: str, + data: bytes, + *, + expected_revision: int | None = None, + ) -> ArtifactRef: + ... + + def list_plan_artifacts(self, plan_id: str) -> list[ArtifactRef]: + ... + + def stat_plan_artifact(self, plan_id: str, name: str) -> ArtifactStat | None: + ... + + # ---------- Execution leases ---------- + def acquire_execution_lease( + self, + plan_id: str, + holder_id: str, + worker_kind: str, + ttl_seconds: int, + ) -> Lease: + ... + + def heartbeat_lease(self, plan_id: str, holder_id: str) -> Lease: + ... + + def release_lease(self, plan_id: str, holder_id: str) -> None: + ... + + def get_active_lease(self, plan_id: str) -> Lease | None: + ... + + # ---------- Locks ---------- + def acquire_lock(self, epic_id: str, holder_id: str, ttl_seconds: int) -> EpicLock: + ... + + def release_lock(self, epic_id: str, holder_id: str) -> None: + ... + + # ---------- Control plane ---------- + def put_control_message(self, msg: ControlMessageInput) -> ControlMessage: + ... + + def claim_pending_control_messages( + self, + *, + processor_id: str, + max: int = 10, + ) -> list[ControlMessage]: + ... + + def mark_control_message_processed(self, msg_id: str, result: JSONDict) -> None: + ... + + def append_progress_event(self, event: ProgressEventInput) -> ProgressEvent: + ... + + def list_progress_events( + self, + *, + plan_id: str | None = None, + epic_id: str | None = None, + since: datetime | None = None, + ) -> list[ProgressEvent]: + ... + + # ---------- Automation actors ---------- + def create_automation_actor( + self, + *, + actor_id: str, + name: str, + granted_epic_ids: str | Sequence[str], + actor_kind: str, + ) -> AutomationActor: + ... + + def load_automation_actor(self, actor_id: str) -> AutomationActor | None: + ... + + def update_automation_actor(self, actor_id: str, **changes: Any) -> AutomationActor: + ... + + +__all__ = [ + "ArtifactRef", + "ArtifactStat", + "Backend", + "ChecklistItemInput", + "ControlMessageInput", + "EpicSummary", + "HotContext", + "JSONDict", + "Lease", + "LeaseConflict", + "LockConflict", + "MessageSearchHit", + "ProgressEventInput", + "RevisionConflict", + "SprintItemInput", + "SprintWithItems", + "Store", + "StoreError", + "Transaction", +] diff --git a/megaplan/store/blob.py b/megaplan/store/blob.py new file mode 100644 index 00000000..03eefcb0 --- /dev/null +++ b/megaplan/store/blob.py @@ -0,0 +1,162 @@ +"""Blob storage protocol and file-mode seam.""" + +from __future__ import annotations + +from datetime import UTC, datetime +import mimetypes +from pathlib import Path +from typing import Protocol, runtime_checkable + +from pydantic import Field + +from megaplan._core.io import commit_journal_transaction, journal_blob_promotion, prepare_journal_transaction +from megaplan.schemas import StorageModel, utc_now + + +class BlobMissingError(FileNotFoundError): + """Raised when a blob cannot be loaded from the backing store.""" + + +class BlobRef(StorageModel): + blob_id: str + content_type: str + size_bytes: int | None = None + storage_url: str | None = None + + +class BlobStat(StorageModel): + blob_id: str + content_type: str + size_bytes: int + updated_at: datetime = Field(default_factory=utc_now) + + +@runtime_checkable +class BlobStore(Protocol): + """Backend-agnostic blob storage contract.""" + + def put(self, blob_id: str, content: bytes, *, content_type: str) -> BlobRef: + ... + + def get(self, blob_id: str) -> bytes: + ... + + def url(self, blob_id: str, *, signed: bool = False, ttl: int = 3600) -> str: + ... + + def delete(self, blob_id: str) -> None: + ... + + def stat(self, blob_id: str) -> BlobStat | None: + ... + + +class LocalDirBlobStore: + """Filesystem blob store seam. + + Blob contents live at ``//data.`` with metadata in + ``meta.json``. Writes use the shared journal blob-promotion helpers so the + file mode semantics match FileStore's staging rules. + """ + + def __init__(self, root: str | Path) -> None: + self.root = Path(root).expanduser().resolve() + self.root.mkdir(parents=True, exist_ok=True) + + def _blob_dir(self, blob_id: str) -> Path: + return self.root / blob_id + + def _find_data_path(self, blob_id: str) -> Path | None: + blob_dir = self._blob_dir(blob_id) + if not blob_dir.exists(): + return None + candidates = [ + path + for path in sorted(blob_dir.glob("data.*")) + if path.is_file() and not path.name.endswith(".staging") + ] + return candidates[0] if candidates else None + + def _meta_path(self, blob_id: str) -> Path: + return self._blob_dir(blob_id) / "meta.json" + + def _extension_for_content_type(self, content_type: str) -> str: + guessed = mimetypes.guess_extension(content_type, strict=False) or ".bin" + return guessed.lstrip(".") + + def put(self, blob_id: str, content: bytes, *, content_type: str) -> BlobRef: + extension = self._extension_for_content_type(content_type) + blob_dir = self._blob_dir(blob_id) + metadata = { + "blob_id": blob_id, + "content_type": content_type, + "size_bytes": len(content), + "updated_at": utc_now().isoformat().replace("+00:00", "Z"), + } + tx_id = f"blob-{blob_id}" + prepare_journal_transaction( + self.root, + tx_id, + blobs=[journal_blob_promotion(blob_dir, content, extension=extension, metadata=metadata)], + ) + commit_journal_transaction(self.root, tx_id) + return BlobRef( + blob_id=blob_id, + content_type=content_type, + size_bytes=len(content), + storage_url=str(self._find_data_path(blob_id)), + ) + + def get(self, blob_id: str) -> bytes: + data_path = self._find_data_path(blob_id) + if data_path is None: + raise BlobMissingError(blob_id) + return data_path.read_bytes() + + def url(self, blob_id: str, *, signed: bool = False, ttl: int = 3600) -> str: + del signed, ttl + data_path = self._find_data_path(blob_id) + if data_path is None: + raise BlobMissingError(blob_id) + return str(data_path) + + def delete(self, blob_id: str) -> None: + blob_dir = self._blob_dir(blob_id) + if not blob_dir.exists(): + return + import shutil + + shutil.rmtree(blob_dir) + + def stat(self, blob_id: str) -> BlobStat | None: + meta_path = self._meta_path(blob_id) + data_path = self._find_data_path(blob_id) + if data_path is None: + return None + if meta_path.exists(): + import json + + meta = json.loads(meta_path.read_text(encoding="utf-8")) + updated_at = datetime.fromisoformat(meta["updated_at"].replace("Z", "+00:00")) + return BlobStat( + blob_id=blob_id, + content_type=meta["content_type"], + size_bytes=meta.get("size_bytes", data_path.stat().st_size), + updated_at=updated_at, + ) + guessed_type = mimetypes.guess_type(str(data_path))[0] or "application/octet-stream" + return BlobStat( + blob_id=blob_id, + content_type=guessed_type, + size_bytes=data_path.stat().st_size, + updated_at=datetime.fromtimestamp(data_path.stat().st_mtime, tz=UTC), + ) + + +__all__ = [ + "BlobMissingError", + "BlobRef", + "BlobStat", + "BlobStore", + "LocalDirBlobStore", +] diff --git a/megaplan/store/compat.py b/megaplan/store/compat.py new file mode 100644 index 00000000..5bdea2b2 --- /dev/null +++ b/megaplan/store/compat.py @@ -0,0 +1,381 @@ +"""Compatibility adapters for live Arnold callers.""" + +from __future__ import annotations + +from datetime import datetime +from typing import Any, Sequence +from uuid import uuid4 + +from megaplan.schemas import StorageModel + +from .base import ChecklistItemInput, JSONDict, LockConflict, SprintItemInput, Store +from .blob import BlobRef as StoreBlobRef +from .blob import BlobStore + + +def _dump(value: Any) -> Any: + if isinstance(value, StorageModel): + return value.model_dump(mode="json") + if isinstance(value, datetime): + return value.isoformat().replace("+00:00", "Z") + if isinstance(value, list): + return [_dump(item) for item in value] + if isinstance(value, tuple): + return [_dump(item) for item in value] + if isinstance(value, dict): + return {key: _dump(item) for key, item in value.items()} + return value + + +class ArnoldStoreAdapter: + """Expose the live Arnold dict-based store API on top of the new Store seam.""" + + def __init__(self, store: Store) -> None: + self._store = store + + def __getattr__(self, name: str) -> Any: + return getattr(self._store, name) + + def _call(self, method_name: str, /, *args: Any, **kwargs: Any) -> Any: + method = getattr(self._store, method_name) + return _dump(method(*args, **kwargs)) + + def transaction(self) -> Any: + return self._store.transaction(epic_id=None) + + def create_message(self, **fields: Any) -> JSONDict: + return self._call("create_message", **fields) + + def load_message(self, message_id: str) -> JSONDict | None: + return self._call("load_message", message_id) + + def load_messages(self, message_ids: Sequence[str]) -> list[JSONDict]: + return self._call("load_messages", message_ids) + + def update_message(self, message_id: str, **changes: Any) -> JSONDict: + return self._call("update_message", message_id, **changes) + + def latest_outbound_message(self, *, epic_id: str | None = None) -> JSONDict | None: + return self._call("latest_outbound_message", epic_id=epic_id) + + def create_turn(self, **fields: Any) -> JSONDict: + return self._call("create_turn", **fields) + + def update_turn(self, turn_id: str, **changes: Any) -> JSONDict: + return self._call("update_turn", turn_id, **changes) + + def find_abandoned_turns(self, older_than_seconds: int) -> list[JSONDict]: + return self._call("find_abandoned_turns", older_than_seconds) + + def record_tool_call(self, **fields: Any) -> JSONDict: + return self._call("record_tool_call", **fields) + + def log_system_event(self, **fields: Any) -> JSONDict: + return self._call("log_system_event", **fields) + + def acquire_epic_lock( + self, + epic_id: str, + *, + holder_id: str, + timeout_seconds: int = 60, + ) -> bool: + try: + acquired = self._store.acquire_lock( + epic_id=epic_id, + holder_id=holder_id, + ttl_seconds=timeout_seconds, + ) + except LockConflict: + return False + return bool(acquired) + + def release_epic_lock(self, epic_id: str, *, holder_id: str) -> None: + self._store.release_lock(epic_id, holder_id) + + def load_hot_context(self, epic_id: str | None) -> JSONDict: + return self._call("load_hot_context", epic_id) + + def find_unprocessed_messages( + self, + epic_id: str, + started_at: str, + exclude_ids: Sequence[str], + ) -> list[JSONDict]: + return self._call( + "find_unprocessed_messages", + epic_id, + started_at, + exclude_ids, + ) + + def insert_pending(self, **fields: Any) -> JSONDict: + return self._call("insert_pending", **fields) + + def mark_confirmed(self, request_id: str, **changes: Any) -> JSONDict: + return self._call("mark_confirmed", request_id, **changes) + + def mark_failed(self, request_id: str, **changes: Any) -> JSONDict: + return self._call("mark_failed", request_id, **changes) + + def find_pending_external_requests(self, older_than_seconds: int) -> list[JSONDict]: + return self._call("find_pending_external_requests", older_than_seconds) + + def mark_orphaned(self, request_id: str, **changes: Any) -> JSONDict: + return self._call("mark_orphaned", request_id, **changes) + + def create_image(self, **fields: Any) -> JSONDict: + return self._call("create_image", **fields) + + def load_image(self, image_id: str) -> JSONDict | None: + return self._call("load_image", image_id) + + def list_images(self, **filters: Any) -> list[JSONDict]: + return self._call("list_images", **filters) + + def update_image(self, image_id: str, **changes: Any) -> JSONDict: + return self._call("update_image", image_id, **changes) + + def list_active_images(self, epic_id: str) -> list[JSONDict]: + return self._call("list_active_images", epic_id) + + def load_active_image_by_reference(self, epic_id: str, reference_key: str) -> JSONDict | None: + return self._call("load_active_image_by_reference", epic_id, reference_key) + + def active_image_reference_exists(self, epic_id: str, reference_key: str) -> bool: + return self._call("active_image_reference_exists", epic_id, reference_key) + + def deactivate_active_image_reference(self, epic_id: str, reference_key: str) -> list[JSONDict]: + return self._call("deactivate_active_image_reference", epic_id, reference_key) + + def create_second_opinion(self, **fields: Any) -> JSONDict: + return self._call("create_second_opinion", **fields) + + def list_second_opinions(self, epic_id: str, *, limit: int | None = None) -> list[JSONDict]: + return self._call("list_second_opinions", epic_id, limit=limit) + + def set_second_opinion_checklist_items( + self, + second_opinion_id: str, + checklist_item_ids: Sequence[str], + ) -> JSONDict: + return self._call( + "set_second_opinion_checklist_items", + second_opinion_id, + checklist_item_ids, + ) + + def create_codebase(self, **fields: Any) -> JSONDict: + return self._call("create_codebase", **fields) + + def upsert_codebase(self, **fields: Any) -> JSONDict: + return self._call("upsert_codebase", **fields) + + def load_codebase(self, codebase_id: str) -> JSONDict | None: + return self._call("load_codebase", codebase_id) + + def find_codebase(self, owner: str, name: str) -> JSONDict | None: + return self._call("find_codebase", owner, name) + + def list_codebases(self, **filters: Any) -> list[JSONDict]: + return self._call("list_codebases", **filters) + + def update_codebase(self, codebase_id: str, **changes: Any) -> JSONDict: + return self._call("update_codebase", codebase_id, **changes) + + def remove_codebase(self, codebase_id: str) -> None: + self._store.remove_codebase(codebase_id) + + def touch_codebase_accessed(self, codebase_id: str, **changes: Any) -> JSONDict: + return self._call("touch_codebase_accessed", codebase_id, **changes) + + def mark_codebase_verified(self, codebase_id: str, **changes: Any) -> JSONDict: + return self._call("mark_codebase_verified", codebase_id, **changes) + + def create_code_artifact(self, **fields: Any) -> JSONDict: + return self._call("create_code_artifact", **fields) + + def load_code_artifact(self, artifact_id: str) -> JSONDict | None: + return self._call("load_code_artifact", artifact_id) + + def list_code_artifacts(self, **filters: Any) -> list[JSONDict]: + return self._call("list_code_artifacts", **filters) + + def update_code_artifact(self, artifact_id: str, **changes: Any) -> JSONDict: + return self._call("update_code_artifact", artifact_id, **changes) + + def delete_code_artifact(self, artifact_id: str) -> None: + self._store.delete_code_artifact(artifact_id) + + def touch_code_artifact_used(self, artifact_id: str, **changes: Any) -> JSONDict: + return self._call("touch_code_artifact_used", artifact_id, **changes) + + def get_api_cache(self, cache_key: str, **filters: Any) -> JSONDict | None: + return self._call("get_api_cache", cache_key, **filters) + + def upsert_api_cache(self, **fields: Any) -> JSONDict: + return self._call("upsert_api_cache", **fields) + + def cleanup_expired_api_cache(self, **filters: Any) -> int: + return self._store.cleanup_expired_api_cache(**filters) + + def create_epic(self, **fields: Any) -> JSONDict: + return self._call("create_epic", **fields) + + def load_epic(self, epic_id: str) -> JSONDict | None: + return self._call("load_epic", epic_id) + + def list_epics(self, **filters: Any) -> list[JSONDict]: + return self._call("list_epics", **filters) + + def search_epics(self, **filters: Any) -> list[JSONDict]: + return self._call("search_epics", **filters) + + def search_messages(self, **filters: Any) -> list[JSONDict]: + return self._call("search_messages", **filters) + + def update_epic(self, epic_id: str, **changes: Any) -> JSONDict: + return self._call("update_epic", epic_id, **changes) + + def seed_checklist(self, epic_id: str, items: Sequence[str]) -> list[JSONDict]: + seeded = [ + ChecklistItemInput( + content=content, + status="open", + position=position, + source="default_seed", + ) + for position, content in enumerate(items, start=1) + ] + return self._call("add_checklist_items", epic_id, seeded) + + def list_checklist_items(self, epic_id: str, *, status: str | None = None) -> list[JSONDict]: + return self._call("list_checklist_items", epic_id, status=status) + + def update_checklist_item(self, item_id: str, **changes: Any) -> JSONDict: + return self._call("update_checklist_item", item_id, **changes) + + def add_checklist_items(self, epic_id: str, items: Sequence[JSONDict]) -> list[JSONDict]: + return self._call( + "add_checklist_items", + epic_id, + [ChecklistItemInput.model_validate(item) for item in items], + ) + + def delete_checklist_items(self, item_ids: Sequence[str]) -> None: + self._store.delete_checklist_items(item_ids) + + def replace_checklist(self, epic_id: str, items: Sequence[JSONDict]) -> list[JSONDict]: + return self._call( + "replace_checklist", + epic_id, + [ChecklistItemInput.model_validate(item) for item in items], + ) + + def record_epic_event(self, **fields: Any) -> JSONDict: + return self._call("record_epic_event", **fields) + + def list_epic_events(self, epic_id: str, **filters: Any) -> list[JSONDict]: + return self._call("list_epic_events", epic_id, **filters) + + def latest_transaction_id(self, epic_id: str) -> str | None: + return self._store.latest_transaction_id(epic_id) + + def events_by_transaction(self, transaction_id: str) -> list[JSONDict]: + return self._call("events_by_transaction", transaction_id) + + def list_recent_turns(self, **filters: Any) -> list[JSONDict]: + return self._call("list_recent_turns", **filters) + + def search_tool_calls_by(self, **filters: Any) -> list[JSONDict]: + return self._call("search_tool_calls_by", **filters) + + def create_feedback(self, **fields: Any) -> JSONDict: + return self._call("create_feedback", **fields) + + def load_feedback(self, feedback_id: str) -> JSONDict | None: + return self._call("load_feedback", feedback_id) + + def update_feedback(self, feedback_id: str, **changes: Any) -> JSONDict: + return self._call("update_feedback", feedback_id, **changes) + + def list_feedback(self, **filters: Any) -> list[JSONDict]: + return self._call("list_feedback", **filters) + + def list_observations(self, **filters: Any) -> list[JSONDict]: + return self._call("list_observations", **filters) + + def create_sprint(self, **fields: Any) -> JSONDict: + return self._call("create_sprint", **fields) + + def load_sprint(self, sprint_id: str) -> JSONDict | None: + return self._call("load_sprint", sprint_id) + + def list_sprints(self, epic_id: str) -> list[JSONDict]: + return self._call("list_sprints", epic_id) + + def update_sprint(self, sprint_id: str, **changes: Any) -> JSONDict: + return self._call("update_sprint", sprint_id, **changes) + + def delete_sprint(self, sprint_id: str) -> None: + self._store.delete_sprint(sprint_id) + + def replace_sprint_items(self, sprint_id: str, items: Sequence[JSONDict]) -> list[JSONDict]: + return self._call( + "replace_sprint_items", + sprint_id, + [SprintItemInput.model_validate(item) for item in items], + ) + + def list_sprint_items(self, sprint_id: str) -> list[JSONDict]: + return self._call("list_sprint_items", sprint_id) + + def list_sprints_with_items(self, epic_id: str) -> list[JSONDict]: + return self._call("list_sprints_with_items", epic_id) + + +class ArnoldBlobAdapter: + """Adapt the new blob seam to Arnold's legacy epic/key-oriented port.""" + + def __init__(self, blob_store: BlobStore) -> None: + self._blob_store = blob_store + + @staticmethod + def _blob_id(epic_id: str, idempotency_key: str | None) -> str: + suffix = idempotency_key or uuid4().hex + return f"{epic_id}/{suffix}" + + @staticmethod + def _ref(epic_id: str, store_ref: StoreBlobRef) -> JSONDict: + key = store_ref.blob_id.partition("/")[2] or store_ref.blob_id + return { + "epic_id": epic_id, + "key": key, + "mime_type": store_ref.content_type, + "size_bytes": store_ref.size_bytes, + } + + def put( + self, + epic_id: str, + content: bytes, + mime_type: str, + *, + idempotency_key: str | None = None, + ) -> JSONDict: + blob_id = self._blob_id(epic_id, idempotency_key) + return self._ref( + epic_id, + self._blob_store.put(blob_id, content, content_type=mime_type), + ) + + def get(self, ref: JSONDict) -> bytes: + blob_id = self._blob_id(str(ref["epic_id"]), str(ref["key"])) + return self._blob_store.get(blob_id) + + def exists(self, ref: JSONDict) -> bool: + blob_id = self._blob_id(str(ref["epic_id"]), str(ref["key"])) + return self._blob_store.stat(blob_id) is not None + + +__all__ = ["ArnoldBlobAdapter", "ArnoldStoreAdapter"] diff --git a/megaplan/store/db.py b/megaplan/store/db.py new file mode 100644 index 00000000..9cfcbb1f --- /dev/null +++ b/megaplan/store/db.py @@ -0,0 +1,33 @@ +"""Database-backed store skeleton for Sprint 1.""" + +from __future__ import annotations + +import inspect + +from .base import Store + + +def _not_implemented_method(name: str): + def _method(self, *args, **kwargs): # type: ignore[no-untyped-def] + raise NotImplementedError(f"DBStore.{name}() is implemented in Sprint 2") + + _method.__name__ = name + _method.__qualname__ = f"DBStore.{name}" + return _method + + +class DBStore: + """Protocol-complete DB store skeleton. + + Sprint 1 intentionally leaves every method unimplemented while keeping the + import and structural typing seam in place for Sprint 2. + """ + + +for _name, _value in inspect.getmembers(Store, predicate=inspect.isfunction): + if _name.startswith("_"): + continue + setattr(DBStore, _name, _not_implemented_method(_name)) + + +__all__ = ["DBStore"] diff --git a/megaplan/store/file.py b/megaplan/store/file.py new file mode 100644 index 00000000..0dc803db --- /dev/null +++ b/megaplan/store/file.py @@ -0,0 +1,2069 @@ +"""File-backed Store implementation for Sprint 1.""" + +from __future__ import annotations + +from collections import defaultdict +from contextlib import AbstractContextManager +from datetime import UTC, datetime, timedelta +import json +import shutil +from pathlib import Path +from typing import Any, Iterable, Mapping, Sequence +from uuid import uuid4 + +from megaplan._core.io import ( + commit_journal_transaction, + fsync_dir, + journal_blob_promotion, + journal_bytes_write, + journal_event_log, + json_dump, + normalize_text, + now_utc, + prepare_journal_transaction, + read_committed_framed_json_records, + recover_journal, +) +from megaplan.schemas import ( + AutomationActor, + BotTurn, + ChecklistItem, + CodeArtifact, + Codebase, + ControlMessage, + Epic, + EpicEvent, + EpicLock, + ExecutionLease, + ExternalRequest, + Feedback, + Image, + Message, + Plan, + ProgressEvent, + SecondOpinion, + Sprint, + SprintItem, + SystemLog, + ToolCall, +) +from megaplan.schemas.base import utc_now + +from .base import ( + ArtifactRef, + ArtifactStat, + ChecklistItemInput, + ControlMessageInput, + EpicSummary, + HotContext, + LeaseConflict, + LockConflict, + MessageSearchHit, + ProgressEventInput, + RevisionConflict, + SprintItemInput, + SprintWithItems, + Store, + Transaction, +) +from .blob import LocalDirBlobStore + +_ACTIVE_EPIC_STATES = {"shaping", "sprinting", "planned", "paused"} +_TERMINAL_TURN_STATUSES = {"completed", "failed", "abandoned"} +_OBSERVATION_KINDS = {"friction", "ambiguity", "tool_failure", "confusion", "pattern_noticed"} +_SOURCE_REFERENCE_PREFIX = { + "user_uploaded": "img_user_upload", + "caller_uploaded": "img_caller_upload", + "agent_generated": "img_agent_generated", +} + + +def _new_id(prefix: str) -> str: + return f"{prefix}_{uuid4().hex[:12]}" + + +def _parse_datetime(value: datetime | str | None) -> datetime | None: + if value is None: + return None + if isinstance(value, datetime): + if value.tzinfo is None: + return value.replace(tzinfo=UTC) + return value.astimezone(UTC) + return datetime.fromisoformat(value.replace("Z", "+00:00")).astimezone(UTC) + + +def _utc_key(value: datetime | None) -> tuple[datetime, bool]: + if value is None: + return (datetime.min.replace(tzinfo=UTC), True) + return (value, False) + + +def _model_bytes(model: Any) -> bytes: + if hasattr(model, "model_dump"): + return json_dump(model.model_dump(mode="json")).encode("utf-8") + return json_dump(model).encode("utf-8") + + +class _FileStoreTransaction(AbstractContextManager["_FileStoreTransaction"]): + def __init__( + self, + store: "FileStore", + journal_root: Path, + *, + joined: bool = False, + parent: "_FileStoreTransaction | None" = None, + ) -> None: + self.store = store + self.journal_root = journal_root + self.tx_id = _new_id("tx") + self._joined = joined + self._parent = parent + self._writes: list[dict[str, Any]] = [] + self._blobs: list[dict[str, Any]] = [] + self._event_logs: dict[str, list[dict[str, Any]]] = defaultdict(list) + + def __enter__(self) -> _FileStoreTransaction: + if self._joined and self._parent is not None: + return self._parent + self.store._active_transaction = self + return self + + def __exit__(self, exc_type, exc, tb) -> bool | None: + if self._joined: + return False + self.store._active_transaction = None + if exc_type is not None: + return False + if not self._writes and not self._blobs and not self._event_logs: + return False + prepare_journal_transaction( + self.journal_root, + self.tx_id, + writes=self._writes, + event_logs=[ + journal_event_log(Path(path), records) + for path, records in self._event_logs.items() + ], + blobs=self._blobs, + ) + commit_journal_transaction(self.journal_root, self.tx_id) + return False + + def add_write(self, path: Path, data: bytes) -> None: + self._writes.append(journal_bytes_write(path, data, tx_id=self.tx_id)) + + def add_blob(self, blob_dir: Path, content: bytes, *, extension: str, metadata: Mapping[str, Any]) -> None: + self._blobs.append( + journal_blob_promotion( + blob_dir, + content, + extension=extension, + metadata=metadata, + ) + ) + + def add_event(self, path: Path, record: Mapping[str, Any]) -> None: + self._event_logs[str(path)].append(dict(record)) + + +class FileStore(Store): + """Filesystem-backed Store implementation. + + The implementation favors compatibility and correctness over cleverness: + records live as JSON files in a stable directory layout, and mutations flow + through the journal helpers added to ``megaplan._core.io`` in Sprint 1. + """ + + def __init__(self, root: str | Path) -> None: + self.root = Path(root).expanduser().resolve() + self.root.mkdir(parents=True, exist_ok=True) + self._active_transaction: _FileStoreTransaction | None = None + self.blobs = LocalDirBlobStore(self.root / "blobs") + self._recover_all_journals() + + # ------------------------------------------------------------------ + # Journal / transaction helpers + # ------------------------------------------------------------------ + + def _recover_all_journals(self) -> None: + recover_journal(self.root) + epics_root = self.root / "epics" + if epics_root.exists(): + for epic_dir in epics_root.iterdir(): + if epic_dir.is_dir(): + recover_journal(epic_dir) + + def _journal_root_for_epic(self, epic_id: str | None) -> Path: + return self._epic_dir(epic_id) if epic_id else self.root + + def _commit_write( + self, + path: Path, + data: bytes, + *, + journal_root: Path, + ) -> None: + transaction = self._active_transaction + if transaction is not None: + transaction.add_write(path, data) + return + tx_id = _new_id("tx") + prepare_journal_transaction( + journal_root, + tx_id, + writes=[journal_bytes_write(path, data, tx_id=tx_id)], + ) + commit_journal_transaction(journal_root, tx_id) + + def _commit_blob( + self, + blob_dir: Path, + content: bytes, + *, + extension: str, + metadata: Mapping[str, Any], + journal_root: Path, + ) -> None: + transaction = self._active_transaction + if transaction is not None: + transaction.add_blob(blob_dir, content, extension=extension, metadata=metadata) + return + tx_id = _new_id("tx") + prepare_journal_transaction( + journal_root, + tx_id, + blobs=[journal_blob_promotion(blob_dir, content, extension=extension, metadata=metadata)], + ) + commit_journal_transaction(journal_root, tx_id) + + def _commit_event(self, epic_id: str, record: Mapping[str, Any]) -> None: + events_path = self._events_path(epic_id) + transaction = self._active_transaction + if transaction is not None: + transaction.add_event(events_path, record) + return + tx_id = _new_id("tx") + prepare_journal_transaction( + self._journal_root_for_epic(epic_id), + tx_id, + event_logs=[journal_event_log(events_path, [record])], + ) + commit_journal_transaction(self._journal_root_for_epic(epic_id), tx_id) + + def transaction(self, epic_id: str | None = None) -> AbstractContextManager[Transaction]: + if self._active_transaction is not None: + return _FileStoreTransaction( + self, + self._active_transaction.journal_root, + joined=True, + parent=self._active_transaction, + ) + return _FileStoreTransaction(self, self._journal_root_for_epic(epic_id)) + + # ------------------------------------------------------------------ + # Path helpers + # ------------------------------------------------------------------ + + def _epic_dir(self, epic_id: str | None) -> Path: + if not epic_id: + return self.root + return self.root / "epics" / epic_id + + def _epic_path(self, epic_id: str) -> Path: + return self._epic_dir(epic_id) / "epic.json" + + def _body_path(self, epic_id: str) -> Path: + return self._epic_dir(epic_id) / "body.md" + + def _checklist_dir(self, epic_id: str) -> Path: + return self._epic_dir(epic_id) / "checklist" + + def _events_path(self, epic_id: str) -> Path: + return self._epic_dir(epic_id) / "events.jsonl" + + def _messages_dir(self) -> Path: + return self.root / "messages" + + def _turns_dir(self) -> Path: + return self.root / "turns" + + def _tool_calls_dir(self) -> Path: + return self.root / "tool_calls" + + def _system_logs_dir(self) -> Path: + return self.root / "system_logs" + + def _external_requests_dir(self) -> Path: + return self.root / "external_requests" + + def _images_dir(self) -> Path: + return self.root / "images" + + def _feedback_dir(self) -> Path: + return self.root / "feedback" + + def _second_opinions_dir(self) -> Path: + return self.root / "second_opinions" + + def _codebases_dir(self) -> Path: + return self.root / "codebases" + + def _code_artifacts_dir(self) -> Path: + return self.root / "code_artifacts" + + def _leases_dir(self) -> Path: + return self.root / "leases" + + def _locks_dir(self) -> Path: + return self.root / "locks" + + def _control_messages_dir(self) -> Path: + return self.root / "control_messages" + + def _progress_events_dir(self) -> Path: + return self.root / "progress_events" + + def _automation_actors_dir(self) -> Path: + return self.root / "automation_actors" + + def _message_path(self, message_id: str) -> Path: + return self._messages_dir() / f"{message_id}.json" + + def _turn_path(self, turn_id: str) -> Path: + return self._turns_dir() / f"{turn_id}.json" + + def _tool_call_path(self, tool_call_id: str) -> Path: + return self._tool_calls_dir() / f"{tool_call_id}.json" + + def _system_log_path(self, log_id: str) -> Path: + return self._system_logs_dir() / f"{log_id}.json" + + def _external_request_path(self, request_id: str) -> Path: + return self._external_requests_dir() / f"{request_id}.json" + + def _image_path(self, image_id: str) -> Path: + return self._images_dir() / f"{image_id}.json" + + def _feedback_path(self, feedback_id: str) -> Path: + return self._feedback_dir() / f"{feedback_id}.json" + + def _second_opinion_path(self, opinion_id: str) -> Path: + return self._second_opinions_dir() / f"{opinion_id}.json" + + def _codebase_path(self, codebase_id: str) -> Path: + return self._codebases_dir() / f"{codebase_id}.json" + + def _code_artifact_path(self, artifact_id: str) -> Path: + return self._code_artifacts_dir() / f"{artifact_id}.json" + + def _lease_path(self, plan_id: str) -> Path: + return self._leases_dir() / f"{plan_id}.json" + + def _lock_path(self, epic_id: str) -> Path: + return self._locks_dir() / f"{epic_id}.json" + + def _control_message_path(self, msg_id: str) -> Path: + return self._control_messages_dir() / f"{msg_id}.json" + + def _progress_event_path(self, event_id: str) -> Path: + return self._progress_events_dir() / f"{event_id}.json" + + def _automation_actor_path(self, actor_id: str) -> Path: + return self._automation_actors_dir() / f"{actor_id}.json" + + def _sprint_dir(self, epic_id: str, sprint_id: str) -> Path: + return self._epic_dir(epic_id) / "sprints" / sprint_id + + def _sprint_path(self, epic_id: str, sprint_id: str) -> Path: + return self._sprint_dir(epic_id, sprint_id) / "sprint.json" + + def _sprint_items_dir(self, epic_id: str, sprint_id: str) -> Path: + return self._sprint_dir(epic_id, sprint_id) / "items" + + def _checklist_path(self, epic_id: str, item_id: str) -> Path: + return self._checklist_dir(epic_id) / f"{item_id}.json" + + def _plan_dir(self, plan_id: str, *, epic_id: str | None, sprint_id: str | None) -> Path: + if epic_id is None: + return self.root / "orphan_plans" / plan_id + if sprint_id: + return self._sprint_dir(epic_id, sprint_id) / "plans" / plan_id + return self._epic_dir(epic_id) / "plans" / plan_id + + def _plan_path(self, plan_id: str, *, epic_id: str | None, sprint_id: str | None) -> Path: + return self._plan_dir(plan_id, epic_id=epic_id, sprint_id=sprint_id) / "plan.json" + + def _plan_artifacts_dir(self, plan_id: str) -> Path: + plan = self.load_plan(plan_id) + if plan is None: + raise FileNotFoundError(f"Unknown plan {plan_id}") + return self._plan_dir(plan_id, epic_id=plan.epic_id, sprint_id=plan.sprint_id) / "artifacts" + + def _find_path(self, pattern: str) -> Path | None: + for candidate in sorted(self.root.glob(pattern)): + if candidate.is_file(): + return candidate + return None + + def _find_checklist_path(self, item_id: str) -> Path | None: + return self._find_path(f"epics/*/checklist/{item_id}.json") + + def _find_sprint_path(self, sprint_id: str) -> Path | None: + return self._find_path(f"epics/*/sprints/{sprint_id}/sprint.json") + + def _find_plan_path(self, plan_id: str) -> Path | None: + patterns = [ + f"orphan_plans/{plan_id}/plan.json", + f"epics/*/plans/{plan_id}/plan.json", + f"epics/*/sprints/*/plans/{plan_id}/plan.json", + ] + for pattern in patterns: + path = self._find_path(pattern) + if path is not None: + return path + return None + + # ------------------------------------------------------------------ + # Generic read/write helpers + # ------------------------------------------------------------------ + + def _load_model(self, path: Path, model_cls: Any) -> Any | None: + if not path.exists(): + return None + data = json.loads(path.read_text(encoding="utf-8")) + return model_cls.model_validate(data) + + def _iter_models(self, directory: Path, model_cls: Any) -> list[Any]: + if not directory.exists(): + return [] + models = [] + for path in sorted(directory.glob("*.json")): + model = self._load_model(path, model_cls) + if model is not None: + models.append(model) + return models + + def _save_model(self, path: Path, model: Any, *, journal_root: Path) -> None: + self._commit_write(path, _model_bytes(model), journal_root=journal_root) + + def _delete_file(self, path: Path) -> None: + if not path.exists(): + return + path.unlink() + fsync_dir(path.parent) + + def _delete_tree(self, path: Path) -> None: + if not path.exists(): + return + shutil.rmtree(path) + fsync_dir(path.parent) + + def _require_expected_revision(self, current_revision: int, expected_revision: int | None) -> None: + if expected_revision is not None and current_revision != expected_revision: + raise RevisionConflict( + f"expected revision {expected_revision}, found {current_revision}", + ) + + def _terminal_turn(self, status: str) -> bool: + return status in _TERMINAL_TURN_STATUSES + + def _touch_updated_at(self, data: dict[str, Any], *, field_name: str = "updated_at") -> None: + if field_name in data: + data[field_name] = utc_now() + + def _update_model( + self, + path: Path, + model_cls: Any, + *, + expected_revision: int | None = None, + journal_root: Path, + **changes: Any, + ) -> Any: + current = self._load_model(path, model_cls) + if current is None: + raise FileNotFoundError(path) + data = current.model_dump() + if "revision" in data: + self._require_expected_revision(int(data["revision"]), expected_revision) + data["revision"] = int(data["revision"]) + 1 + data.update(changes) + if "updated_at" in data and "updated_at" not in changes: + data["updated_at"] = utc_now() + if "last_edited_at" in data and "last_edited_at" not in changes: + data["last_edited_at"] = utc_now() + if model_cls is BotTurn and self._terminal_turn(str(data.get("status"))) and not data.get("completed_at"): + data["completed_at"] = utc_now() + if model_cls is Feedback and data.get("resolved") and not data.get("resolved_at"): + data["resolved_at"] = utc_now() + updated = model_cls.model_validate(data) + self._save_model(path, updated, journal_root=journal_root) + return updated + + def _messages(self) -> list[Message]: + return self._iter_models(self._messages_dir(), Message) + + def _turns(self) -> list[BotTurn]: + return self._iter_models(self._turns_dir(), BotTurn) + + def _tool_calls(self) -> list[ToolCall]: + return self._iter_models(self._tool_calls_dir(), ToolCall) + + def _system_logs(self) -> list[SystemLog]: + return self._iter_models(self._system_logs_dir(), SystemLog) + + def _external_requests(self) -> list[ExternalRequest]: + return self._iter_models(self._external_requests_dir(), ExternalRequest) + + def _images(self) -> list[Image]: + return self._iter_models(self._images_dir(), Image) + + def _feedback_records(self) -> list[Feedback]: + return self._iter_models(self._feedback_dir(), Feedback) + + def _second_opinions(self) -> list[SecondOpinion]: + return self._iter_models(self._second_opinions_dir(), SecondOpinion) + + def _codebases(self) -> list[Codebase]: + return self._iter_models(self._codebases_dir(), Codebase) + + def _code_artifacts(self) -> list[CodeArtifact]: + return self._iter_models(self._code_artifacts_dir(), CodeArtifact) + + def _control_messages(self) -> list[ControlMessage]: + return self._iter_models(self._control_messages_dir(), ControlMessage) + + def _progress_events(self) -> list[ProgressEvent]: + return self._iter_models(self._progress_events_dir(), ProgressEvent) + + def _automation_actors(self) -> list[AutomationActor]: + return self._iter_models(self._automation_actors_dir(), AutomationActor) + + def _epics(self) -> list[Epic]: + epics_root = self.root / "epics" + if not epics_root.exists(): + return [] + epics: list[Epic] = [] + for path in sorted(epics_root.glob("*/epic.json")): + epic = self._load_model(path, Epic) + if epic is not None: + epics.append(epic) + return epics + + def _checklist_items(self, epic_id: str) -> list[ChecklistItem]: + items = self._iter_models(self._checklist_dir(epic_id), ChecklistItem) + return sorted(items, key=lambda item: (item.position, item.id)) + + def _sprints(self, epic_id: str) -> list[Sprint]: + sprints_root = self._epic_dir(epic_id) / "sprints" + if not sprints_root.exists(): + return [] + sprints: list[Sprint] = [] + for path in sorted(sprints_root.glob("*/sprint.json")): + sprint = self._load_model(path, Sprint) + if sprint is not None: + sprints.append(sprint) + return sorted(sprints, key=lambda sprint: (sprint.sprint_number, sprint.id)) + + def _plan_roots(self) -> list[Path]: + roots: list[Path] = [] + orphan_root = self.root / "orphan_plans" + if orphan_root.exists(): + roots.extend(path for path in sorted(orphan_root.iterdir()) if path.is_dir()) + epics_root = self.root / "epics" + if epics_root.exists(): + for epic_dir in sorted(path for path in epics_root.iterdir() if path.is_dir()): + direct_plans = epic_dir / "plans" + if direct_plans.exists(): + roots.extend(path for path in sorted(direct_plans.iterdir()) if path.is_dir()) + sprint_root = epic_dir / "sprints" + if sprint_root.exists(): + for sprint_dir in sorted(path for path in sprint_root.iterdir() if path.is_dir()): + plans_dir = sprint_dir / "plans" + if plans_dir.exists(): + roots.extend(path for path in sorted(plans_dir.iterdir()) if path.is_dir()) + return roots + + def _plans(self) -> list[Plan]: + plans: list[Plan] = [] + for plan_dir in self._plan_roots(): + plan = self._load_model(plan_dir / "plan.json", Plan) + if plan is not None: + plans.append(plan) + return sorted(plans, key=lambda plan: (plan.name, plan.id)) + + def _artifact_ref(self, plan_id: str, path: Path) -> ArtifactRef: + stat = path.stat() + return ArtifactRef( + plan_id=plan_id, + name=path.name, + size_bytes=stat.st_size, + sha256=self._sha256_bytes(path.read_bytes()), + updated_at=datetime.fromtimestamp(stat.st_mtime, tz=UTC), + ) + + def _sha256_bytes(self, content: bytes) -> str: + import hashlib + + return "sha256:" + hashlib.sha256(content).hexdigest() + + # ------------------------------------------------------------------ + # Epic + body + # ------------------------------------------------------------------ + + def create_epic( + self, + *, + title: str, + goal: str, + body: str, + state: str = "shaping", + home_backend: str = "file", + ) -> Epic: + epic_id = _new_id("epic") + epic = Epic( + id=epic_id, + title=title, + goal=goal, + body=body, + state=state, + home_backend=home_backend, + revision=0, + created_at=utc_now(), + last_edited_at=utc_now(), + ) + journal_root = self._journal_root_for_epic(epic_id) + with self.transaction(epic_id): + self._save_model(self._epic_path(epic_id), epic, journal_root=journal_root) + self._commit_write(self._body_path(epic_id), body.encode("utf-8"), journal_root=journal_root) + return epic + + def load_epic(self, epic_id: str) -> Epic | None: + return self._load_model(self._epic_path(epic_id), Epic) + + def update_epic(self, epic_id: str, *, expected_revision: int | None = None, **changes: Any) -> Epic: + current = self.load_epic(epic_id) + if current is None: + raise FileNotFoundError(epic_id) + self._require_expected_revision(current.revision, expected_revision) + data = current.model_dump() + data.update(changes) + data["revision"] = current.revision + 1 + data["last_edited_at"] = utc_now() + updated = Epic.model_validate(data) + journal_root = self._journal_root_for_epic(epic_id) + with self.transaction(epic_id): + self._save_model(self._epic_path(epic_id), updated, journal_root=journal_root) + if "body" in changes: + self._commit_write(self._body_path(epic_id), updated.body.encode("utf-8"), journal_root=journal_root) + return updated + + def list_epics( + self, + *, + active_only: bool = True, + limit: int = 50, + home_backend: str | None = None, + ) -> list[EpicSummary]: + epics = self._epics() + if active_only: + epics = [epic for epic in epics if epic.state in _ACTIVE_EPIC_STATES] + if home_backend is not None: + epics = [epic for epic in epics if epic.home_backend == home_backend] + summaries = [EpicSummary.model_validate(epic.model_dump()) for epic in epics[:limit]] + return summaries + + def search_epics(self, *, query: str, active_only: bool = True, limit: int = 20) -> list[EpicSummary]: + needle = normalize_text(query) + matches: list[tuple[int, Epic]] = [] + for epic in self._epics(): + if active_only and epic.state not in _ACTIVE_EPIC_STATES: + continue + haystack = normalize_text(f"{epic.title} {epic.goal} {epic.body}") + if needle in haystack: + matches.append((haystack.count(needle), epic)) + matches.sort(key=lambda item: (-item[0], item[1].id)) + return [ + EpicSummary.model_validate({**epic.model_dump(mode="json"), "rank": score}) + for score, epic in matches[:limit] + ] + + def load_body(self, epic_id: str) -> str: + body_path = self._body_path(epic_id) + if body_path.exists(): + return body_path.read_text(encoding="utf-8") + epic = self.load_epic(epic_id) + if epic is None: + raise FileNotFoundError(epic_id) + return epic.body + + def update_body(self, epic_id: str, body: str, *, expected_revision: int) -> Epic: + return self.update_epic(epic_id, expected_revision=expected_revision, body=body) + + # ------------------------------------------------------------------ + # Checklist + # ------------------------------------------------------------------ + + def seed_checklist(self, epic_id: str, items: Sequence[str]) -> list[ChecklistItem]: + seeded = [ + ChecklistItemInput( + content=content, + status="open", + position=index, + source="default_seed", + ) + for index, content in enumerate(items, start=1) + ] + return self.add_checklist_items(epic_id, seeded) + + def list_checklist_items(self, epic_id: str, *, status: str | None = None) -> list[ChecklistItem]: + items = self._checklist_items(epic_id) + if status is not None: + items = [item for item in items if item.status == status] + return items + + def add_checklist_items(self, epic_id: str, items: Sequence[ChecklistItemInput]) -> list[ChecklistItem]: + existing = self._checklist_items(epic_id) + next_position = max((item.position for item in existing), default=0) + 1 + created: list[ChecklistItem] = [] + journal_root = self._journal_root_for_epic(epic_id) + with self.transaction(epic_id): + for entry in items: + position = entry.position or next_position + next_position = max(next_position, position + 1) + item = ChecklistItem( + id=entry.id or _new_id("check"), + epic_id=epic_id, + content=entry.content, + status=entry.status, + position=position, + source=entry.source, + skip_reason=entry.skip_reason, + superseded_by_item_id=entry.superseded_by_item_id, + created_at=entry.created_at or utc_now(), + completed_at=entry.completed_at, + ) + self._save_model(self._checklist_path(epic_id, item.id), item, journal_root=journal_root) + created.append(item) + return sorted(created, key=lambda item: (item.position, item.id)) + + def update_checklist_item(self, item_id: str, **changes: Any) -> ChecklistItem: + path = self._find_checklist_path(item_id) + if path is None: + raise FileNotFoundError(item_id) + item = self._load_model(path, ChecklistItem) + assert item is not None + data = item.model_dump() + data.update(changes) + if data.get("status") == "done" and not data.get("completed_at"): + data["completed_at"] = utc_now() + updated = ChecklistItem.model_validate(data) + self._save_model(path, updated, journal_root=self._journal_root_for_epic(updated.epic_id)) + return updated + + def delete_checklist_items(self, item_ids: Sequence[str]) -> None: + for item_id in item_ids: + path = self._find_checklist_path(item_id) + if path is not None: + self._delete_file(path) + + def replace_checklist(self, epic_id: str, items: Sequence[ChecklistItemInput]) -> list[ChecklistItem]: + for existing in self._checklist_items(epic_id): + self._delete_file(self._checklist_path(epic_id, existing.id)) + return self.add_checklist_items(epic_id, items) + + # ------------------------------------------------------------------ + # Sprints + # ------------------------------------------------------------------ + + def create_sprint( + self, + *, + epic_id: str, + sprint_number: int, + name: str, + goal: str, + status: str = "proposed", + queue_position: int | None = None, + pending_reason: str | None = None, + target_weeks: int = 2, + ) -> Sprint: + sprint = Sprint( + id=_new_id("sprint"), + epic_id=epic_id, + sprint_number=sprint_number, + name=name, + goal=goal, + status=status, + revision=0, + queue_position=queue_position, + pending_reason=pending_reason, + target_weeks=target_weeks, + created_at=utc_now(), + updated_at=utc_now(), + queued_at=utc_now() if status == "queued" else None, + ) + self._save_model(self._sprint_path(epic_id, sprint.id), sprint, journal_root=self._journal_root_for_epic(epic_id)) + return sprint + + def load_sprint(self, sprint_id: str) -> Sprint | None: + path = self._find_sprint_path(sprint_id) + return self._load_model(path, Sprint) if path is not None else None + + def list_sprints(self, epic_id: str, *, status: str | None = None) -> list[Sprint]: + sprints = self._sprints(epic_id) + if status is not None: + sprints = [sprint for sprint in sprints if sprint.status == status] + return sprints + + def list_sprint_items(self, sprint_id: str) -> list[SprintItem]: + sprint = self.load_sprint(sprint_id) + if sprint is None: + return [] + items = self._iter_models(self._sprint_items_dir(sprint.epic_id, sprint.id), SprintItem) + return sorted(items, key=lambda item: (item.position, item.id)) + + def list_sprints_with_items(self, epic_id: str) -> list[SprintWithItems]: + result: list[SprintWithItems] = [] + for sprint in self.list_sprints(epic_id): + result.append( + SprintWithItems.model_validate( + { + **sprint.model_dump(mode="json"), + "items": [item.model_dump(mode="json") for item in self.list_sprint_items(sprint.id)], + } + ) + ) + return result + + def update_sprint(self, sprint_id: str, *, expected_revision: int | None = None, **changes: Any) -> Sprint: + path = self._find_sprint_path(sprint_id) + if path is None: + raise FileNotFoundError(sprint_id) + sprint = self._load_model(path, Sprint) + assert sprint is not None + data = sprint.model_dump() + self._require_expected_revision(sprint.revision, expected_revision) + data["revision"] = sprint.revision + 1 + data.update(changes) + data["updated_at"] = utc_now() + if data.get("status") == "queued" and not data.get("queued_at"): + data["queued_at"] = utc_now() + updated = Sprint.model_validate(data) + self._save_model(path, updated, journal_root=self._journal_root_for_epic(updated.epic_id)) + return updated + + def delete_sprint(self, sprint_id: str) -> None: + path = self._find_sprint_path(sprint_id) + if path is None: + return + self._delete_tree(path.parent) + + def replace_sprint_items(self, sprint_id: str, items: Sequence[SprintItemInput]) -> list[SprintItem]: + sprint = self.load_sprint(sprint_id) + if sprint is None: + raise FileNotFoundError(sprint_id) + items_dir = self._sprint_items_dir(sprint.epic_id, sprint.id) + self._delete_tree(items_dir) + created: list[SprintItem] = [] + next_position = 1 + for entry in items: + item = SprintItem( + id=entry.id or _new_id("sitem"), + sprint_id=sprint_id, + content=entry.content, + estimated_complexity=entry.estimated_complexity, + status=entry.status, + source_section=entry.source_section, + position=entry.position or next_position, + created_at=entry.created_at or utc_now(), + ) + next_position = item.position + 1 + self._save_model(items_dir / f"{item.id}.json", item, journal_root=self._journal_root_for_epic(sprint.epic_id)) + created.append(item) + return sorted(created, key=lambda item: (item.position, item.id)) + + def set_sprint_queue( + self, + epic_id: str, + ordered_sprint_ids: Sequence[str], + pending: Mapping[str, str], + ) -> list[Sprint]: + result: list[Sprint] = [] + with self.transaction(epic_id): + for sprint in self.list_sprints(epic_id): + data = sprint.model_dump() + if sprint.id in ordered_sprint_ids: + data["status"] = "queued" + data["queue_position"] = ordered_sprint_ids.index(sprint.id) + 1 + data["pending_reason"] = None + data["queued_at"] = utc_now() + elif sprint.id in pending: + data["status"] = "pending" + data["queue_position"] = None + data["pending_reason"] = pending[sprint.id] + else: + data["queue_position"] = None + data["pending_reason"] = None + if data["status"] in {"queued", "pending"}: + data["status"] = "proposed" + data["revision"] = sprint.revision + 1 + data["updated_at"] = utc_now() + updated = Sprint.model_validate(data) + self._save_model(self._sprint_path(epic_id, sprint.id), updated, journal_root=self._journal_root_for_epic(epic_id)) + result.append(updated) + return sorted(result, key=lambda sprint: (sprint.queue_position or 9999, sprint.sprint_number, sprint.id)) + + # ------------------------------------------------------------------ + # Events + # ------------------------------------------------------------------ + + def record_epic_event( + self, + *, + epic_id: str, + transaction_id: str, + event_type: str, + summary: str, + prior_state: dict[str, Any] | None, + turn_id: str | None, + ) -> EpicEvent: + event = EpicEvent( + id=_new_id("evt"), + epic_id=epic_id, + transaction_id=transaction_id, + event_type=event_type, + summary=summary, + prior_state=prior_state, + turn_id=turn_id, + occurred_at=utc_now(), + ) + self._commit_event(epic_id, event.model_dump(mode="json")) + return event + + def list_epic_events( + self, + epic_id: str, + *, + since: str | None = None, + until: str | None = None, + kinds: Sequence[str] | None = None, + limit: int | None = None, + ) -> list[EpicEvent]: + events_path = self._events_path(epic_id) + events = [ + EpicEvent.model_validate({key: value for key, value in record.items() if key != "tx_id"}) + for record in read_committed_framed_json_records(events_path) + ] + since_dt = _parse_datetime(since) + until_dt = _parse_datetime(until) + filtered: list[EpicEvent] = [] + for event in events: + if kinds and event.event_type not in kinds: + continue + if since_dt and event.occurred_at < since_dt: + continue + if until_dt and event.occurred_at > until_dt: + continue + filtered.append(event) + filtered.sort(key=lambda event: (event.occurred_at, event.id)) + if limit is not None: + return filtered[:limit] + return filtered + + def latest_transaction_id(self, epic_id: str) -> str | None: + events = self.list_epic_events(epic_id) + if not events: + return None + return events[-1].transaction_id + + def events_by_transaction(self, transaction_id: str) -> list[EpicEvent]: + results: list[EpicEvent] = [] + for epic in self._epics(): + results.extend( + event + for event in self.list_epic_events(epic.id) + if event.transaction_id == transaction_id + ) + results.sort(key=lambda event: (event.occurred_at, event.id)) + return results + + # ------------------------------------------------------------------ + # Messages / turns / tools / logs + # ------------------------------------------------------------------ + + def _next_invocation_message_id(self, turn_id: str) -> str: + count = sum(1 for row in self._messages() if row.bot_turn_id == turn_id and row.direction == "outbound") + return f"inv_{turn_id}_{count + 1}" + + def create_message( + self, + *, + epic_id: str | None, + direction: str, + content: str, + discord_message_id: str | None = None, + bot_turn_id: str | None = None, + has_code_attachment: bool = False, + has_image_attachment: bool = False, + in_burst_with: Sequence[str] | None = None, + was_voice_message: bool = False, + audio_storage_url: str | None = None, + transcription_metadata: dict[str, Any] | None = None, + synthesize_outbound_id: bool = True, + ) -> Message: + if synthesize_outbound_id and direction == "outbound" and discord_message_id is None and bot_turn_id: + discord_message_id = self._next_invocation_message_id(bot_turn_id) + message = Message( + id=_new_id("msg"), + epic_id=epic_id, + direction=direction, + content=content, + sent_at=utc_now(), + discord_message_id=discord_message_id, + has_code_attachment=has_code_attachment, + has_image_attachment=has_image_attachment, + in_burst_with=list(in_burst_with or []), + was_voice_message=was_voice_message, + audio_storage_url=audio_storage_url, + transcription_metadata=transcription_metadata, + bot_turn_id=bot_turn_id, + ) + self._save_model(self._message_path(message.id), message, journal_root=self.root) + return message + + def load_message(self, message_id: str) -> Message | None: + return self._load_model(self._message_path(message_id), Message) + + def load_messages(self, message_ids: Sequence[str]) -> list[Message]: + by_id = {message.id: message for message in self._messages()} + return [by_id[msg_id] for msg_id in message_ids if msg_id in by_id] + + def update_message(self, message_id: str, **changes: Any) -> Message: + return self._update_model( + self._message_path(message_id), + Message, + journal_root=self.root, + **changes, + ) + + def latest_outbound_message(self, *, epic_id: str | None = None) -> Message | None: + messages = [row for row in self._messages() if row.direction == "outbound"] + if epic_id is not None: + messages = [row for row in messages if row.epic_id == epic_id] + messages.sort(key=lambda row: (_utc_key(row.sent_at), row.id), reverse=True) + return messages[0] if messages else None + + def create_turn( + self, + *, + epic_id: str | None, + triggered_by_message_ids: Sequence[str], + prompt_snapshot: dict[str, Any] | None = None, + prompt_version: str | None = None, + state_at_turn: dict[str, Any] | None = None, + model_version: str | None = None, + ) -> BotTurn: + turn = BotTurn( + id=_new_id("turn"), + epic_id=epic_id, + triggered_by_message_ids=list(triggered_by_message_ids), + prompt_snapshot=prompt_snapshot, + prompt_version=prompt_version, + status="in_progress", + state_at_turn=state_at_turn, + model_version=model_version, + started_at=utc_now(), + ) + self._save_model(self._turn_path(turn.id), turn, journal_root=self.root) + return turn + + def update_turn(self, turn_id: str, **changes: Any) -> BotTurn: + return self._update_model(self._turn_path(turn_id), BotTurn, journal_root=self.root, **changes) + + def find_abandoned_turns(self, older_than_seconds: int) -> list[BotTurn]: + cutoff = datetime.now(UTC) - timedelta(seconds=older_than_seconds) + return sorted( + [ + turn + for turn in self._turns() + if turn.status == "in_progress" and turn.started_at <= cutoff + ], + key=lambda turn: (turn.started_at, turn.id), + ) + + def list_recent_turns(self, *, n: int = 10, epic_id: str | None = None) -> list[BotTurn]: + turns = self._turns() + if epic_id is not None: + turns = [turn for turn in turns if turn.epic_id == epic_id] + turns.sort(key=lambda turn: (_utc_key(turn.started_at), turn.id), reverse=True) + return turns[:n] + + def search_messages(self, *, query: str, epic_id: str | None = None, limit: int = 20) -> list[MessageSearchHit]: + needle = normalize_text(query) + hits: list[tuple[int, Message]] = [] + for message in self._messages(): + if epic_id is not None and message.epic_id != epic_id: + continue + content = normalize_text(message.content) + if needle in content: + hits.append((content.count(needle), message)) + hits.sort(key=lambda item: (-item[0], item[1].id)) + return [ + MessageSearchHit.model_validate({**msg.model_dump(mode="json"), "rank": score}) + for score, msg in hits[:limit] + ] + + def record_tool_call( + self, + *, + turn_id: str, + tool_name: str, + operation_kind: str, + arguments: dict[str, Any], + result: dict[str, Any], + duration_ms: int, + ) -> ToolCall: + tool_call = ToolCall( + id=_new_id("tool"), + turn_id=turn_id, + tool_name=tool_name, + operation_kind=operation_kind, + arguments=arguments, + result=result, + duration_ms=duration_ms, + called_at=utc_now(), + ) + self._save_model(self._tool_call_path(tool_call.id), tool_call, journal_root=self.root) + return tool_call + + def search_tool_calls_by( + self, + *, + tool_name: str | None = None, + epic_id: str | None = None, + since: str | None = None, + limit: int = 20, + ) -> list[ToolCall]: + since_dt = _parse_datetime(since) + turns_by_id = {turn.id: turn for turn in self._turns()} + matches: list[ToolCall] = [] + for row in self._tool_calls(): + if tool_name is not None and row.tool_name != tool_name: + continue + if epic_id is not None and turns_by_id.get(row.turn_id, BotTurn(id="", status="in_progress")).epic_id != epic_id: + continue + if since_dt and row.called_at < since_dt: + continue + matches.append(row) + matches.sort(key=lambda row: (_utc_key(row.called_at), row.id), reverse=True) + return matches[:limit] + + def log_system_event( + self, + *, + level: str, + category: str, + event_type: str, + message: str, + details: dict[str, Any] | None = None, + turn_id: str | None = None, + epic_id: str | None = None, + ) -> SystemLog: + log = SystemLog( + id=_new_id("log"), + level=level, + category=category, + event_type=event_type, + message=message, + details=details or {}, + turn_id=turn_id, + epic_id=epic_id, + occurred_at=utc_now(), + ) + self._save_model(self._system_log_path(log.id), log, journal_root=self.root) + return log + + def load_hot_context(self, epic_id: str | None) -> HotContext: + recent_messages = self.search_messages(query="", epic_id=epic_id, limit=10) if False else [] + messages = [msg for msg in self._messages() if msg.epic_id == epic_id] + messages.sort(key=lambda msg: (_utc_key(msg.sent_at), msg.id), reverse=True) + tool_calls = self.search_tool_calls_by(epic_id=epic_id, limit=10) + feedback = self.list_feedback(epic_id=epic_id, active=True, limit=20) + unresolved = [ + item + for item in self.list_observations(resolved=False, limit=20) + if item.epic_id == epic_id + ] + sprints = self.list_sprints_with_items(epic_id) if epic_id else [] + active_images = self.list_active_images(epic_id) if epic_id else [] + opinions = self.list_second_opinions(epic_id, limit=10) if epic_id else [] + all_pending = bool(sprints) and all(sprint.status == "pending" for sprint in sprints) + return HotContext( + epic=self.load_epic(epic_id) if epic_id else None, + recent_messages=messages[:10], + recent_tool_calls=tool_calls, + active_feedback=feedback, + unresolved_observations=unresolved, + sprints=sprints, + codebases=self.list_codebases(epic_id=epic_id), + recent_code_artifacts=self.list_code_artifacts(epic_id=epic_id, limit=10), + active_images=active_images, + recent_second_opinions=opinions, + all_sprints_pending_no_queued=all_pending and not any(sprint.status == "queued" for sprint in sprints), + ) + + def find_unprocessed_messages(self, epic_id: str, started_at: str, exclude_ids: Sequence[str]) -> list[Message]: + start_dt = _parse_datetime(started_at) + return sorted( + [ + msg + for msg in self._messages() + if msg.epic_id == epic_id + and msg.direction == "inbound" + and msg.bot_turn_id is None + and msg.id not in set(exclude_ids) + and (start_dt is None or msg.sent_at >= start_dt) + ], + key=lambda msg: (msg.sent_at, msg.id), + ) + + # ------------------------------------------------------------------ + # External requests + # ------------------------------------------------------------------ + + def insert_pending( + self, + *, + idempotency_key: str, + provider: str, + endpoint: str, + request_summary: dict[str, Any], + request_body: dict[str, Any] | None = None, + turn_id: str | None = None, + tool_call_id: str | None = None, + ) -> ExternalRequest: + if any(row.idempotency_key == idempotency_key for row in self._external_requests()): + raise ValueError(f"duplicate idempotency_key: {idempotency_key}") + request = ExternalRequest( + id=_new_id("req"), + idempotency_key=idempotency_key, + provider=provider, + endpoint=endpoint, + tool_call_id=tool_call_id, + turn_id=turn_id, + request_summary=request_summary, + request_body=request_body, + status="pending", + attempt_count=1, + first_attempted_at=utc_now(), + last_attempted_at=utc_now(), + ) + self._save_model(self._external_request_path(request.id), request, journal_root=self.root) + return request + + def _update_external_request(self, request_id: str, **changes: Any) -> ExternalRequest: + return self._update_model(self._external_request_path(request_id), ExternalRequest, journal_root=self.root, **changes) + + def mark_confirmed( + self, + request_id: str, + *, + provider_request_id: str | None = None, + provider_response_summary: dict[str, Any] | None = None, + ) -> ExternalRequest: + return self._update_external_request( + request_id, + status="confirmed", + provider_request_id=provider_request_id, + provider_response_summary=provider_response_summary, + completed_at=utc_now(), + last_attempted_at=utc_now(), + ) + + def mark_failed(self, request_id: str, *, error_details: dict[str, Any]) -> ExternalRequest: + return self._update_external_request( + request_id, + status="failed", + error_details=error_details, + completed_at=utc_now(), + last_attempted_at=utc_now(), + ) + + def find_pending_external_requests(self, older_than_seconds: int) -> list[ExternalRequest]: + cutoff = datetime.now(UTC) - timedelta(seconds=older_than_seconds) + return sorted( + [ + row + for row in self._external_requests() + if row.status == "pending" and row.last_attempted_at <= cutoff + ], + key=lambda row: (row.last_attempted_at, row.id), + ) + + def mark_orphaned(self, request_id: str, *, error_details: dict[str, Any]) -> ExternalRequest: + return self._update_external_request( + request_id, + status="orphaned", + error_details=error_details, + completed_at=utc_now(), + last_attempted_at=utc_now(), + ) + + # ------------------------------------------------------------------ + # Images + # ------------------------------------------------------------------ + + def _next_image_reference(self, source: str) -> str: + prefix = _SOURCE_REFERENCE_PREFIX.get(source, f"img_{source}") + count = sum(1 for row in self._images() if row.source == source) + return f"{prefix}_{count + 1}" + + def create_image( + self, + *, + epic_id: str, + source: str, + storage_url: str, + prompt: str | None = None, + quality: str | None = None, + size: str | None = None, + reference_key: str | None = None, + description: str | None = None, + caption: str | None = None, + in_body: bool = False, + active: bool = True, + discord_attachment_id: str | None = None, + ) -> Image: + ref = reference_key or self._next_image_reference(source) + if active: + self.deactivate_active_image_reference(epic_id, ref) + image = Image( + id=_new_id("img"), + epic_id=epic_id, + source=source, + prompt=prompt, + storage_url=storage_url, + quality=quality, + size=size, + created_at=utc_now(), + reference_key=ref, + description=description, + caption=caption, + in_body=in_body, + active=active, + discord_attachment_id=discord_attachment_id, + ) + self._save_model(self._image_path(image.id), image, journal_root=self.root) + return image + + def load_image(self, image_id: str) -> Image | None: + return self._load_model(self._image_path(image_id), Image) + + def list_images(self, *, epic_id: str, source: str | None = None, active: bool | None = True) -> list[Image]: + images = [row for row in self._images() if row.epic_id == epic_id] + if source is not None: + images = [row for row in images if row.source == source] + if active is not None: + images = [row for row in images if row.active == active] + images.sort(key=lambda row: (row.created_at, row.id)) + return images + + def update_image(self, image_id: str, **changes: Any) -> Image: + if changes.get("active") and changes.get("reference_key"): + epic_id = changes.get("epic_id") or (self.load_image(image_id).epic_id if self.load_image(image_id) else None) + if epic_id: + self.deactivate_active_image_reference(epic_id, changes["reference_key"]) + return self._update_model(self._image_path(image_id), Image, journal_root=self.root, **changes) + + def list_active_images(self, epic_id: str) -> list[Image]: + return self.list_images(epic_id=epic_id, active=True) + + def load_active_image_by_reference(self, epic_id: str, reference_key: str) -> Image | None: + for image in self.list_active_images(epic_id): + if image.reference_key == reference_key: + return image + return None + + def active_image_reference_exists(self, epic_id: str, reference_key: str) -> bool: + return self.load_active_image_by_reference(epic_id, reference_key) is not None + + def deactivate_active_image_reference(self, epic_id: str, reference_key: str) -> list[Image]: + updated: list[Image] = [] + for image in self.list_active_images(epic_id): + if image.reference_key != reference_key: + continue + updated.append(self.update_image(image.id, active=False)) + return updated + + # ------------------------------------------------------------------ + # Second opinions + # ------------------------------------------------------------------ + + def create_second_opinion( + self, + *, + epic_id: str, + requested_by: str, + focus_areas: Sequence[str], + raw_response: str, + score: int, + summary: str, + verdict: str, + model_used: str, + resulting_checklist_item_ids: Sequence[str] | None = None, + ) -> SecondOpinion: + opinion = SecondOpinion( + id=_new_id("opinion"), + epic_id=epic_id, + requested_at=utc_now(), + requested_by=requested_by, + focus_areas=list(focus_areas), + raw_response=raw_response, + score=score, + summary=summary, + verdict=verdict, + resulting_checklist_item_ids=list(resulting_checklist_item_ids or []), + model_used=model_used, + ) + self._save_model(self._second_opinion_path(opinion.id), opinion, journal_root=self.root) + return opinion + + def list_second_opinions(self, epic_id: str, *, limit: int | None = None) -> list[SecondOpinion]: + opinions = [row for row in self._second_opinions() if row.epic_id == epic_id] + opinions.sort(key=lambda row: (_utc_key(row.requested_at), row.id), reverse=True) + return opinions[:limit] if limit is not None else opinions + + def set_second_opinion_checklist_items( + self, + second_opinion_id: str, + checklist_item_ids: Sequence[str], + ) -> SecondOpinion: + return self._update_model( + self._second_opinion_path(second_opinion_id), + SecondOpinion, + journal_root=self.root, + resulting_checklist_item_ids=list(checklist_item_ids), + ) + + # ------------------------------------------------------------------ + # Codebases / code artifacts + # ------------------------------------------------------------------ + + def create_codebase( + self, + *, + owner: str, + name: str, + default_branch: str, + scope: str = "global", + group_name: str | None = None, + associated_epic_id: str | None = None, + added_via: str = "manual", + verified_accessible_at: str | None = None, + notes: str | None = None, + codebase_id: str | None = None, + ) -> Codebase: + codebase = Codebase( + id=codebase_id or _new_id("codebase"), + owner=owner.lower(), + name=name.lower(), + default_branch=default_branch, + scope=scope, + group_name=group_name, + associated_epic_id=associated_epic_id, + added_at=utc_now(), + added_via=added_via, + verified_accessible_at=_parse_datetime(verified_accessible_at), + notes=notes, + ) + self._save_model(self._codebase_path(codebase.id), codebase, journal_root=self.root) + return codebase + + def upsert_codebase(self, **fields: Any) -> Codebase: + existing = self.find_codebase(fields["owner"], fields["name"]) + if existing is None: + return self.create_codebase(**fields) + return self.update_codebase(existing.id, **fields) + + def load_codebase(self, codebase_id: str) -> Codebase | None: + return self._load_model(self._codebase_path(codebase_id), Codebase) + + def find_codebase(self, owner: str, name: str) -> Codebase | None: + owner_l = owner.lower() + name_l = name.lower() + for codebase in self._codebases(): + if codebase.owner == owner_l and codebase.name == name_l: + return codebase + return None + + def list_codebases( + self, + *, + scope: str | None = None, + group_name: str | None = None, + epic_id: str | None = None, + include_global: bool = True, + ) -> list[Codebase]: + codebases = self._codebases() + if scope is not None: + codebases = [row for row in codebases if row.scope == scope] + if group_name is not None: + codebases = [row for row in codebases if row.group_name == group_name] + if epic_id is not None: + codebases = [ + row + for row in codebases + if row.associated_epic_id == epic_id or (include_global and row.scope == "global") + ] + elif not include_global: + codebases = [row for row in codebases if row.scope != "global"] + codebases.sort(key=lambda row: (row.owner, row.name, row.id)) + return codebases + + def update_codebase(self, codebase_id: str, **changes: Any) -> Codebase: + if "owner" in changes: + changes["owner"] = changes["owner"].lower() + if "name" in changes: + changes["name"] = changes["name"].lower() + return self._update_model(self._codebase_path(codebase_id), Codebase, journal_root=self.root, **changes) + + def remove_codebase(self, codebase_id: str) -> None: + self._delete_file(self._codebase_path(codebase_id)) + + def touch_codebase_accessed(self, codebase_id: str, *, accessed_at: str | None = None) -> Codebase: + return self.update_codebase(codebase_id, last_accessed_at=_parse_datetime(accessed_at) or utc_now()) + + def mark_codebase_verified( + self, + codebase_id: str, + *, + verified_at: str | None = None, + default_branch: str | None = None, + ) -> Codebase: + changes: dict[str, Any] = {"verified_accessible_at": _parse_datetime(verified_at) or utc_now()} + if default_branch is not None: + changes["default_branch"] = default_branch + return self.update_codebase(codebase_id, **changes) + + def create_code_artifact( + self, + *, + kind: str, + source: str, + content: str, + codebase_id: str | None = None, + epic_id: str | None = None, + file_path: str | None = None, + line_range: Any = None, + scope: str | None = None, + content_summary: str | None = None, + metadata: dict[str, Any] | None = None, + expires_at: str | None = None, + artifact_id: str | None = None, + ) -> CodeArtifact: + artifact = CodeArtifact( + id=artifact_id or _new_id("artifact"), + codebase_id=codebase_id, + epic_id=epic_id, + kind=kind, + source=source, + file_path=file_path, + line_range=line_range, + scope=scope, + content=content, + content_summary=content_summary, + metadata=metadata or {}, + created_at=utc_now(), + expires_at=_parse_datetime(expires_at), + ) + self._save_model(self._code_artifact_path(artifact.id), artifact, journal_root=self.root) + return artifact + + def load_code_artifact(self, artifact_id: str) -> CodeArtifact | None: + return self._load_model(self._code_artifact_path(artifact_id), CodeArtifact) + + def list_code_artifacts( + self, + *, + codebase_id: str | None = None, + epic_id: str | None = None, + kind: str | None = None, + source: str | None = None, + file_path: str | None = None, + scope: str | None = None, + include_expired: bool = True, + limit: int | None = 50, + ) -> list[CodeArtifact]: + now_dt = datetime.now(UTC) + artifacts = self._code_artifacts() + filtered: list[CodeArtifact] = [] + for artifact in artifacts: + if codebase_id is not None and artifact.codebase_id != codebase_id: + continue + if epic_id is not None and artifact.epic_id != epic_id: + continue + if kind is not None and artifact.kind != kind: + continue + if source is not None and artifact.source != source: + continue + if file_path is not None and artifact.file_path != file_path: + continue + if scope is not None and artifact.scope != scope: + continue + if not include_expired and artifact.expires_at is not None and artifact.expires_at <= now_dt: + continue + filtered.append(artifact) + filtered.sort(key=lambda row: (_utc_key(row.created_at), row.id), reverse=True) + if limit is not None: + return filtered[:limit] + return filtered + + def update_code_artifact(self, artifact_id: str, **changes: Any) -> CodeArtifact: + return self._update_model(self._code_artifact_path(artifact_id), CodeArtifact, journal_root=self.root, **changes) + + def delete_code_artifact(self, artifact_id: str) -> None: + self._delete_file(self._code_artifact_path(artifact_id)) + + def touch_code_artifact_used(self, artifact_id: str, *, used_at: str | None = None) -> CodeArtifact: + return self.update_code_artifact(artifact_id, last_used_at=_parse_datetime(used_at) or utc_now()) + + def get_api_cache(self, cache_key: str, *, now: str | None = None, touch: bool = True) -> CodeArtifact | None: + now_dt = _parse_datetime(now) or datetime.now(UTC) + for artifact in self._code_artifacts(): + if artifact.kind != "api_cache": + continue + if artifact.metadata.get("cache_key") != cache_key: + continue + if artifact.expires_at is not None and artifact.expires_at <= now_dt: + return None + if touch: + return self.touch_code_artifact_used(artifact.id) + return artifact + return None + + def upsert_api_cache( + self, + *, + cache_key: str, + content: str, + content_summary: str | None = None, + metadata: dict[str, Any] | None = None, + codebase_id: str | None = None, + epic_id: str | None = None, + file_path: str | None = None, + scope: str | None = None, + expires_at: str | None = None, + ttl_seconds: int = 3600, + ) -> CodeArtifact: + existing = self.get_api_cache(cache_key, touch=False) + expiry = _parse_datetime(expires_at) or (datetime.now(UTC) + timedelta(seconds=ttl_seconds)) + merged_metadata = dict(metadata or {}) + merged_metadata["cache_key"] = cache_key + if existing is None: + return self.create_code_artifact( + kind="api_cache", + source="conversation", + content=content, + codebase_id=codebase_id, + epic_id=epic_id, + file_path=file_path, + scope=scope, + content_summary=content_summary, + metadata=merged_metadata, + expires_at=expiry.isoformat().replace("+00:00", "Z"), + ) + return self.update_code_artifact( + existing.id, + content=content, + content_summary=content_summary, + metadata=merged_metadata, + codebase_id=codebase_id, + epic_id=epic_id, + file_path=file_path, + scope=scope, + expires_at=expiry, + ) + + def cleanup_expired_api_cache(self, *, now: str | None = None) -> int: + now_dt = _parse_datetime(now) or datetime.now(UTC) + expired = [ + artifact + for artifact in self._code_artifacts() + if artifact.kind == "api_cache" and artifact.expires_at is not None and artifact.expires_at <= now_dt + ] + for artifact in expired: + self.delete_code_artifact(artifact.id) + return len(expired) + + # ------------------------------------------------------------------ + # Feedback + # ------------------------------------------------------------------ + + def create_feedback( + self, + *, + kind: str, + content: str, + source: str, + source_message_id: str | None = None, + epic_id: str | None = None, + turn_id: str | None = None, + context_snapshot: dict[str, Any] | None = None, + ) -> Feedback: + feedback = Feedback( + id=_new_id("fb"), + kind=kind, + content=content, + source=source, + source_message_id=source_message_id, + epic_id=epic_id, + turn_id=turn_id, + context_snapshot=context_snapshot, + created_at=utc_now(), + ) + self._save_model(self._feedback_path(feedback.id), feedback, journal_root=self.root) + return feedback + + def load_feedback(self, feedback_id: str) -> Feedback | None: + return self._load_model(self._feedback_path(feedback_id), Feedback) + + def update_feedback(self, feedback_id: str, **changes: Any) -> Feedback: + return self._update_model(self._feedback_path(feedback_id), Feedback, journal_root=self.root, **changes) + + def list_feedback( + self, + *, + epic_id: str | None = None, + active: bool | None = None, + kinds: Sequence[str] | None = None, + limit: int | None = None, + ) -> list[Feedback]: + feedback = self._feedback_records() + if epic_id is not None: + feedback = [row for row in feedback if row.epic_id == epic_id] + if active is not None: + feedback = [row for row in feedback if row.active == active] + if kinds is not None: + allowed = set(kinds) + feedback = [row for row in feedback if row.kind in allowed] + feedback.sort(key=lambda row: (_utc_key(row.created_at), row.id), reverse=True) + return feedback[:limit] if limit is not None else feedback + + def list_observations(self, *, resolved: bool | None = None, limit: int | None = None) -> list[Feedback]: + feedback = [row for row in self._feedback_records() if row.kind in _OBSERVATION_KINDS] + if resolved is not None: + feedback = [row for row in feedback if row.resolved == resolved] + feedback.sort(key=lambda row: (_utc_key(row.created_at), row.id), reverse=True) + return feedback[:limit] if limit is not None else feedback + + # ------------------------------------------------------------------ + # Plans / artifacts + # ------------------------------------------------------------------ + + def create_plan( + self, + *, + sprint_id: str | None, + epic_id: str | None, + name: str, + idea: str, + **fields: Any, + ) -> Plan: + now_dt = utc_now() + plan_id = fields.pop("plan_id", name) + plan = Plan( + id=plan_id, + name=name, + epic_id=epic_id, + sprint_id=sprint_id, + revision=int(fields.pop("revision", 0)), + idea=idea, + current_state=fields.pop("current_state", "initialized"), + iteration=int(fields.pop("iteration", 1)), + config=dict(fields.pop("config", {})), + sessions=dict(fields.pop("sessions", {})), + plan_versions=list(fields.pop("plan_versions", [])), + history=list(fields.pop("history", [])), + meta=dict(fields.pop("meta", {})), + last_gate=dict(fields.pop("last_gate", {})), + active_step=fields.pop("active_step", None), + clarification=fields.pop("clarification", None), + latest_finalize=fields.pop("latest_finalize", None), + latest_review=fields.pop("latest_review", None), + latest_execution=fields.pop("latest_execution", None), + latest_failure=fields.pop("latest_failure", None), + artifacts=list(fields.pop("artifacts", [])), + created_at=_parse_datetime(fields.pop("created_at", now_dt)) or now_dt, + updated_at=_parse_datetime(fields.pop("updated_at", now_dt)) or now_dt, + ) + self._save_model( + self._plan_path(plan.id, epic_id=epic_id, sprint_id=sprint_id), + plan, + journal_root=self._journal_root_for_epic(epic_id), + ) + return plan + + def load_plan(self, plan_id: str) -> Plan | None: + path = self._find_plan_path(plan_id) + return self._load_model(path, Plan) if path is not None else None + + def update_plan(self, plan_id: str, *, expected_revision: int | None = None, **changes: Any) -> Plan: + current = self.load_plan(plan_id) + if current is None: + raise FileNotFoundError(plan_id) + self._require_expected_revision(current.revision, expected_revision) + data = current.model_dump() + data.update(changes) + data["revision"] = current.revision + 1 + data["updated_at"] = utc_now() + updated = Plan.model_validate(data) + self._save_model( + self._plan_path(plan_id, epic_id=updated.epic_id, sprint_id=updated.sprint_id), + updated, + journal_root=self._journal_root_for_epic(updated.epic_id), + ) + return updated + + def list_plans( + self, + *, + sprint_id: str | None = None, + epic_id: str | None = None, + include_orphans: bool = False, + ) -> list[Plan]: + plans = self._plans() + if sprint_id is not None: + plans = [plan for plan in plans if plan.sprint_id == sprint_id] + if epic_id is not None: + plans = [plan for plan in plans if plan.epic_id == epic_id] + elif not include_orphans: + plans = [plan for plan in plans if plan.epic_id is not None] + return plans + + def read_plan_artifact(self, plan_id: str, name: str) -> bytes | None: + path = self._plan_artifacts_dir(plan_id) / name + return path.read_bytes() if path.exists() else None + + def write_plan_artifact( + self, + plan_id: str, + name: str, + data: bytes, + *, + expected_revision: int | None = None, + ) -> ArtifactRef: + plan = self.load_plan(plan_id) + if plan is None: + raise FileNotFoundError(plan_id) + self._require_expected_revision(plan.revision, expected_revision) + artifact_path = self._plan_artifacts_dir(plan_id) / name + self._commit_write(artifact_path, data, journal_root=self._journal_root_for_epic(plan.epic_id)) + return self._artifact_ref(plan_id, artifact_path) + + def list_plan_artifacts(self, plan_id: str) -> list[ArtifactRef]: + artifact_dir = self._plan_artifacts_dir(plan_id) + if not artifact_dir.exists(): + return [] + refs = [self._artifact_ref(plan_id, path) for path in sorted(artifact_dir.iterdir()) if path.is_file()] + refs.sort(key=lambda ref: ref.name) + return refs + + def stat_plan_artifact(self, plan_id: str, name: str) -> ArtifactStat | None: + path = self._plan_artifacts_dir(plan_id) / name + if not path.exists(): + return None + stat = path.stat() + return ArtifactStat( + plan_id=plan_id, + name=name, + size_bytes=stat.st_size, + sha256=self._sha256_bytes(path.read_bytes()), + updated_at=datetime.fromtimestamp(stat.st_mtime, tz=UTC), + ) + + # ------------------------------------------------------------------ + # Leases / locks + # ------------------------------------------------------------------ + + def acquire_execution_lease( + self, + plan_id: str, + holder_id: str, + worker_kind: str, + ttl_seconds: int, + ) -> ExecutionLease: + current = self.get_active_lease(plan_id) + if current is not None and current.holder_id != holder_id: + raise LeaseConflict(plan_id) + plan = self.load_plan(plan_id) + lease = ExecutionLease( + plan_id=plan_id, + epic_id=plan.epic_id if plan else None, + holder_id=holder_id, + phase=plan.current_state if plan else "unknown", + worker_kind=worker_kind, + acquired_at=utc_now(), + heartbeat_at=utc_now(), + expires_at=datetime.now(UTC) + timedelta(seconds=ttl_seconds), + ) + self._save_model(self._lease_path(plan_id), lease, journal_root=self.root) + return lease + + def heartbeat_lease(self, plan_id: str, holder_id: str) -> ExecutionLease: + lease = self.get_active_lease(plan_id) + if lease is None or lease.holder_id != holder_id: + raise LeaseConflict(plan_id) + ttl_seconds = max(int((lease.expires_at - lease.heartbeat_at).total_seconds()), 60) + return self._update_model( + self._lease_path(plan_id), + ExecutionLease, + journal_root=self.root, + heartbeat_at=utc_now(), + expires_at=datetime.now(UTC) + timedelta(seconds=ttl_seconds), + ) + + def release_lease(self, plan_id: str, holder_id: str) -> None: + lease = self.get_active_lease(plan_id) + if lease is None or lease.holder_id != holder_id: + return + self._delete_file(self._lease_path(plan_id)) + + def get_active_lease(self, plan_id: str) -> ExecutionLease | None: + lease = self._load_model(self._lease_path(plan_id), ExecutionLease) + if lease is None: + return None + if lease.expires_at <= datetime.now(UTC): + self._delete_file(self._lease_path(plan_id)) + return None + return lease + + def acquire_lock(self, epic_id: str, holder_id: str, ttl_seconds: int) -> EpicLock: + current = self._load_model(self._lock_path(epic_id), EpicLock) + if current is not None and current.expires_at > datetime.now(UTC) and current.holder_id != holder_id: + raise LockConflict(epic_id) + lock = EpicLock( + epic_id=epic_id, + holder_id=holder_id, + acquired_at=utc_now(), + expires_at=datetime.now(UTC) + timedelta(seconds=ttl_seconds), + ) + self._save_model(self._lock_path(epic_id), lock, journal_root=self.root) + return lock + + def release_lock(self, epic_id: str, holder_id: str) -> None: + current = self._load_model(self._lock_path(epic_id), EpicLock) + if current is None or current.holder_id != holder_id: + return + self._delete_file(self._lock_path(epic_id)) + + # ------------------------------------------------------------------ + # Control plane / progress + # ------------------------------------------------------------------ + + def put_control_message(self, msg: ControlMessageInput) -> ControlMessage: + control = ControlMessage( + id=_new_id("ctrl"), + epic_id=msg.epic_id, + actor_id=msg.actor_id, + intent=msg.intent, + target_id=msg.target_id, + payload=msg.payload, + idempotency_key=msg.idempotency_key, + created_at=utc_now(), + ) + self._save_model(self._control_message_path(control.id), control, journal_root=self.root) + return control + + def claim_pending_control_messages(self, *, processor_id: str, max: int = 10) -> list[ControlMessage]: + pending = [ + row + for row in self._control_messages() + if row.claimed_at is None and row.processed_at is None + ] + pending.sort(key=lambda row: (_utc_key(row.created_at), row.id)) + claimed: list[ControlMessage] = [] + for row in pending[:max]: + claimed.append( + self._update_model( + self._control_message_path(row.id), + ControlMessage, + journal_root=self.root, + processor_id=processor_id, + claimed_at=utc_now(), + ) + ) + return claimed + + def mark_control_message_processed(self, msg_id: str, result: dict[str, Any]) -> None: + self._update_model( + self._control_message_path(msg_id), + ControlMessage, + journal_root=self.root, + result=result, + processed_at=utc_now(), + ) + + def append_progress_event(self, event: ProgressEventInput) -> ProgressEvent: + progress = ProgressEvent( + id=_new_id("prog"), + epic_id=event.epic_id, + plan_id=event.plan_id, + sprint_id=event.sprint_id, + kind=event.kind, + summary=event.summary, + details=event.details, + occurred_at=utc_now(), + ) + self._save_model(self._progress_event_path(progress.id), progress, journal_root=self.root) + return progress + + def list_progress_events( + self, + *, + plan_id: str | None = None, + epic_id: str | None = None, + since: datetime | None = None, + ) -> list[ProgressEvent]: + events = self._progress_events() + if plan_id is not None: + events = [row for row in events if row.plan_id == plan_id] + if epic_id is not None: + events = [row for row in events if row.epic_id == epic_id] + if since is not None: + events = [row for row in events if row.occurred_at >= since] + events.sort(key=lambda row: (row.occurred_at, row.id)) + return events + + # ------------------------------------------------------------------ + # Automation actors + # ------------------------------------------------------------------ + + def create_automation_actor( + self, + *, + actor_id: str, + name: str, + granted_epic_ids: str | Sequence[str], + actor_kind: str, + ) -> AutomationActor: + actor = AutomationActor( + id=actor_id, + name=name, + granted_epic_ids=granted_epic_ids, + actor_kind=actor_kind, + created_at=utc_now(), + ) + self._save_model(self._automation_actor_path(actor.id), actor, journal_root=self.root) + return actor + + def load_automation_actor(self, actor_id: str) -> AutomationActor | None: + return self._load_model(self._automation_actor_path(actor_id), AutomationActor) + + def update_automation_actor(self, actor_id: str, **changes: Any) -> AutomationActor: + if "last_active_at" not in changes: + changes["last_active_at"] = utc_now() + return self._update_model(self._automation_actor_path(actor_id), AutomationActor, journal_root=self.root, **changes) + + +__all__ = ["FileStore"] diff --git a/megaplan/store/plan_repository.py b/megaplan/store/plan_repository.py new file mode 100644 index 00000000..2c90b2ac --- /dev/null +++ b/megaplan/store/plan_repository.py @@ -0,0 +1,354 @@ +"""Plan-tree access seam for Sprint 1 file mode.""" + +from __future__ import annotations + +import hashlib +import re +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from megaplan._core.io import ( + atomic_write_bytes, + atomic_write_json, + atomic_write_text, + find_plan_dir, + now_utc, + plan_search_roots, + read_json, +) +from megaplan.schemas import Plan, PlanArtifact + +from .base import Store + +_EXECUTION_BATCH_RE = re.compile(r"execution_batch_(\d+)\.json$") +_VERSION_RE = re.compile(r"_v(\d+)(?:\.|_|$)") + + +class PlanRepository: + """File-mode repository for the existing megaplan tree layout. + + The repository intentionally operates on the current on-disk plan tree + instead of routing plan artifacts through ``Store``. This preserves the + byte-sensitive fixture and worker surface that still expects a real + filesystem directory. + """ + + def __init__( + self, + root: str | Path, + *, + store: Store | None = None, + home: str | Path | None = None, + ) -> None: + self.root = Path(root) + self.store = store + self.home = Path(home) if home is not None else None + self._plan_dir = self.root if self._looks_like_plan_dir(self.root) else None + + @classmethod + def from_plan_dir( + cls, + plan_dir: str | Path, + *, + store: Store | None = None, + home: str | Path | None = None, + ) -> PlanRepository: + return cls(plan_dir, store=store, home=home) + + @staticmethod + def _looks_like_plan_dir(path: Path) -> bool: + return (path / "state.json").exists() + + def _home_path(self) -> Path | None: + if self.home is None: + return None + return self.home.expanduser().resolve() + + def _require_plan_dir(self) -> Path: + if self._plan_dir is None: + raise RuntimeError("PlanRepository is not bound to a plan directory") + return self._plan_dir + + def _resolve_artifact_path(self, name: str | Path) -> Path: + relative = Path(name) + if relative.is_absolute() or any(part == ".." for part in relative.parts): + raise ValueError(f"Artifact path must stay inside the plan tree: {name!r}") + return self.plan_dir / relative + + @property + def is_bound(self) -> bool: + return self._plan_dir is not None + + @property + def plan_dir(self) -> Path: + return self._require_plan_dir() + + @property + def plan_name(self) -> str: + return self.plan_dir.name + + @property + def working_dir(self) -> Path: + return self.plan_dir + + @property + def compatibility_lock_path(self) -> Path: + return self.plan_dir / ".plan.lock" + + def resolve_plan_dir(self, plan_name: str) -> Path: + if self._plan_dir is not None: + if self.plan_dir.name != plan_name: + raise FileNotFoundError(plan_name) + return self.plan_dir + plan_dir = find_plan_dir(self.root, plan_name, home=self._home_path()) + if plan_dir is None: + raise FileNotFoundError(plan_name) + return plan_dir + + def for_plan(self, plan_name: str) -> PlanRepository: + return type(self)(self.resolve_plan_dir(plan_name), store=self.store, home=self.home) + + def active_plan_dirs(self) -> list[Path]: + by_name: dict[str, Path] = {} + for candidate_root in plan_search_roots(self.root, home=self._home_path()): + if not candidate_root.exists(): + continue + for child in candidate_root.iterdir(): + if child.is_dir() and self._looks_like_plan_dir(child): + by_name.setdefault(child.name, child) + return [by_name[name] for name in sorted(by_name)] + + def exists(self) -> bool: + return self._require_plan_dir().exists() + + def list_artifact_paths(self) -> list[Path]: + plan_dir = self._require_plan_dir() + return sorted( + (path for path in plan_dir.rglob("*") if path.is_file()), + key=lambda path: path.relative_to(plan_dir).as_posix(), + ) + + def list_artifact_names(self) -> list[str]: + plan_dir = self._require_plan_dir() + return [path.relative_to(plan_dir).as_posix() for path in self.list_artifact_paths()] + + def artifact_path(self, name: str | Path) -> Path: + return self._resolve_artifact_path(name) + + def read_artifact_bytes(self, name: str | Path) -> bytes | None: + path = self._resolve_artifact_path(name) + return path.read_bytes() if path.exists() else None + + def read_artifact_text(self, name: str | Path) -> str | None: + data = self.read_artifact_bytes(name) + return data.decode("utf-8") if data is not None else None + + def read_artifact_json(self, name: str | Path) -> dict[str, Any] | list[Any] | None: + path = self._resolve_artifact_path(name) + return read_json(path) if path.exists() else None + + def write_artifact_bytes(self, name: str | Path, data: bytes) -> Path: + path = self._resolve_artifact_path(name) + atomic_write_bytes(path, data) + return path + + def write_artifact_text(self, name: str | Path, data: str) -> Path: + path = self._resolve_artifact_path(name) + atomic_write_text(path, data) + return path + + def write_artifact_json(self, name: str | Path, data: Any) -> Path: + path = self._resolve_artifact_path(name) + atomic_write_json(path, data) + return path + + def delete_artifact(self, name: str | Path) -> None: + path = self._resolve_artifact_path(name) + if path.exists(): + path.unlink() + + def load_state(self) -> dict[str, Any]: + return read_json(self.plan_dir / "state.json") + + def save_state(self, state: dict[str, Any]) -> None: + atomic_write_json(self.plan_dir / "state.json", state) + + def list_execution_batch_artifacts(self) -> list[Path]: + return sorted( + ( + path + for path in self.list_artifact_paths() + if _EXECUTION_BATCH_RE.fullmatch(path.name) + ), + key=lambda path: path.name, + ) + + def latest_execution_batch_artifact(self) -> Path | None: + batches = self.list_execution_batch_artifacts() + return batches[-1] if batches else None + + def latest_plan_markdown_artifact(self) -> Path | None: + state = self.load_state() + plan_versions = state.get("plan_versions") or [] + if not plan_versions: + return None + latest = plan_versions[-1] + if not isinstance(latest, dict): + return None + filename = latest.get("file") + if not isinstance(filename, str) or not filename: + return None + path = self.artifact_path(filename) + return path if path.exists() else None + + def _artifact_kind(self, path: Path) -> str: + if path.suffix == ".md": + return "markdown" + if path.suffix == ".json": + return "json" + if path.suffix == ".jsonl": + return "jsonl" + if path.suffix == ".lock": + return "lock" + if path.suffix == ".txt": + return "raw_text" + return "derived" + + def _artifact_role(self, path: Path) -> str | None: + name = path.name + if name == "state.json" or (name.startswith("plan_v") and name.endswith(".meta.json")): + return "plan_meta" + if name.startswith("plan_v") and name.endswith(".md"): + return "plan_version" + if name == "prep.json": + return "prep" + if name == "review.json": + return "review" + if name.startswith("review_v") and name.endswith("_raw.txt"): + return "raw_worker_output" + if name == "gate.json": + return "gate" + if name.startswith("gate_signals_v"): + return "gate_signals" + if name == "execution.json": + return "execution" + if name.startswith("execution_batch_"): + return "execution_batch" + if name == "execution_audit.json": + return "execution_audit" + if name == "execution_checkpoint.json": + return "execution_checkpoint" + if name == "execution_trace.jsonl": + return "execution_trace" + if name.startswith("execute_v") and name.endswith("_raw.txt"): + return "raw_worker_output" + if name == "finalize.json" or name == "finalize_snapshot.json": + return "finalize_snapshot" if name == "finalize_snapshot.json" else "finalize" + if name.startswith("critique"): + return "critique" + if name == "faults.json": + return "faults" + if name.startswith("step_receipt_"): + return "receipt" + if name == "final.md": + return "derived_final" + if name == "directors_notes.json": + return "directors_notes" + if name == "human_verifications.json": + return "human_verifications" + if name == "tiebreaker_decisions.json": + return "tiebreaker_decisions" + if name == "tiebreaker_payload.json": + return "tiebreaker_payload" + if name.endswith(".tmpl") or name.endswith(".template"): + return "template" + if name.startswith("research") or name.endswith(".research.json"): + return "research" + return None + + def _artifact_version(self, path: Path) -> int | None: + match = _VERSION_RE.search(path.name) + return int(match.group(1)) if match is not None else None + + def _artifact_batch(self, path: Path) -> int | None: + match = _EXECUTION_BATCH_RE.fullmatch(path.name) + return int(match.group(1)) if match is not None else None + + def _artifact_phase(self, path: Path) -> str | None: + name = path.name + if name == "state.json": + return "state" + if name == "final.md": + return "finalize" + if name.startswith("step_receipt_"): + phase = name[len("step_receipt_"):] + if "_v" in phase: + return phase.split("_v", 1)[0] + return phase.removesuffix(".json") + if name.startswith("execution_batch_"): + return "execute" + if "_v" in name: + return name.split("_v", 1)[0] + if "." in name: + return name.split(".", 1)[0] + return None + + def describe_artifact(self, name: str | Path) -> PlanArtifact: + path = self._resolve_artifact_path(name) + if not path.exists(): + raise FileNotFoundError(path) + role = self._artifact_role(path) + if role is None: + raise ValueError(f"Artifact has no typed PlanArtifact role: {path.name}") + data = path.read_bytes() + return PlanArtifact( + name=path.relative_to(self.plan_dir).as_posix(), + kind=self._artifact_kind(path), + role=role, + version=self._artifact_version(path), + batch=self._artifact_batch(path), + phase=self._artifact_phase(path), + sha256="sha256:" + hashlib.sha256(data).hexdigest(), + ) + + def list_artifacts(self) -> list[PlanArtifact]: + artifacts: list[PlanArtifact] = [] + for name in self.list_artifact_names(): + path = self._resolve_artifact_path(name) + if self._artifact_role(path) is None: + continue + artifacts.append(self.describe_artifact(name)) + return artifacts + + def load_plan(self) -> Plan: + state = self.load_state() + review = self.read_artifact_json("review.json") + finalize = self.read_artifact_json("finalize.json") + execution = self.read_artifact_json("execution.json") + latest_failure = None + history = state.get("history") or [] + if isinstance(history, list): + for entry in reversed(history): + if isinstance(entry, dict) and entry.get("result") == "failed": + latest_failure = dict(entry) + break + timestamps = [path.stat().st_mtime for path in self.list_artifact_paths()] + updated_at = ( + datetime.fromtimestamp(max(timestamps), tz=UTC) + if timestamps + else datetime.fromisoformat(now_utc().replace("Z", "+00:00")) + ) + return Plan.from_plan_state( + state, + plan_id=self.plan_name, + artifacts=self.list_artifacts(), + latest_finalize=finalize if isinstance(finalize, dict) else None, + latest_review=review if isinstance(review, dict) else None, + latest_execution=execution if isinstance(execution, dict) else None, + latest_failure=latest_failure, + updated_at=updated_at, + ) + + def save_plan(self, plan: Plan) -> None: + self.save_state(plan.to_plan_state()) diff --git a/megaplan/tests/__init__.py b/megaplan/tests/__init__.py new file mode 100644 index 00000000..6c17a24e --- /dev/null +++ b/megaplan/tests/__init__.py @@ -0,0 +1 @@ +"""Test helpers bundled with the megaplan package.""" diff --git a/megaplan/tests/store_contract.py b/megaplan/tests/store_contract.py new file mode 100644 index 00000000..ff9fac87 --- /dev/null +++ b/megaplan/tests/store_contract.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Callable + +from megaplan.store import ( + ArnoldStoreAdapter, + ChecklistItemInput, + ControlMessageInput, + ProgressEventInput, + SprintItemInput, + Store, +) + + +def run_store_contract(store_factory: Callable[[], Store]) -> None: + store = store_factory() + + epic = store.create_epic( + title="Editorial Title", + goal="Editorial Goal", + body="# Editorial Title\n\nEditorial Goal\n", + ) + assert store.load_epic(epic.id).title == "Editorial Title" + assert store.load_body(epic.id).startswith("# Editorial Title") + updated_epic = store.update_body(epic.id, "# Revised\n", expected_revision=epic.revision) + assert updated_epic.revision == epic.revision + 1 + assert store.search_epics(query="revised")[0].id == epic.id + + checklist = store.seed_checklist(epic.id, ["First item", "Second item"]) + assert [item.position for item in checklist] == [1, 2] + replaced = store.replace_checklist( + epic.id, + [ + ChecklistItemInput(content="Replacement item", status="open", position=1, source="user_requested"), + ], + ) + assert [item.content for item in replaced] == ["Replacement item"] + assert store.update_checklist_item(replaced[0].id, status="done").completed_at is not None + + sprint = store.create_sprint(epic_id=epic.id, sprint_number=1, name="Sprint 1", goal="Ship it") + items = store.replace_sprint_items( + sprint.id, + [ + SprintItemInput(content="Investigate", estimated_complexity="small", status="open", position=1), + ], + ) + assert items[0].content == "Investigate" + queued = store.set_sprint_queue(epic.id, [sprint.id], {}) + assert queued[0].queue_position == 1 + assert store.list_sprints_with_items(epic.id)[0].items[0].id == items[0].id + + bootstrap_turn = store.create_turn(epic_id=None, triggered_by_message_ids=[], prompt_snapshot={"phase": "bootstrap"}) + assert bootstrap_turn.epic_id is None + bootstrap_message = store.create_message(epic_id=None, direction="inbound", content="bootstrap hello") + assert bootstrap_message.epic_id is None + + inbound = store.create_message( + epic_id=epic.id, + direction="inbound", + content="hello from user", + discord_message_id="discord_1", + has_code_attachment=True, + ) + turn = store.create_turn( + epic_id=epic.id, + triggered_by_message_ids=[inbound.id], + prompt_snapshot={"input": "hello from user"}, + state_at_turn={"state": "shaping"}, + model_version="fake", + ) + completed_turn = store.update_turn(turn.id, status="completed", reasoning="done") + assert completed_turn.completed_at is not None + outbound = store.create_message(epic_id=epic.id, direction="outbound", content="hi", bot_turn_id=turn.id) + assert outbound.discord_message_id == f"inv_{turn.id}_1" + assert [row.id for row in store.load_messages([outbound.id, inbound.id])] == [outbound.id, inbound.id] + assert store.latest_outbound_message(epic_id=epic.id).id == outbound.id + assert store.search_messages(query="hello", epic_id=epic.id)[0].id == inbound.id + assert store.find_unprocessed_messages(epic.id, inbound.sent_at.isoformat().replace("+00:00", "Z"), exclude_ids=[]) == [inbound] + + tool_call = store.record_tool_call( + turn_id=turn.id, + tool_name="send_message", + operation_kind="write", + arguments={"content": "hi"}, + result={"discord_message_id": outbound.discord_message_id}, + duration_ms=1, + ) + assert tool_call.arguments["content"] == "hi" + log = store.log_system_event( + level="info", + category="system", + event_type="contract", + message="ok", + details={"ok": True}, + turn_id=turn.id, + epic_id=epic.id, + ) + assert log.details["ok"] is True + hot = store.load_hot_context(epic.id) + assert hot.epic.id == epic.id + assert any(row.id == inbound.id for row in hot.recent_messages) + + first_event = store.record_epic_event( + epic_id=epic.id, + transaction_id="txn_shared", + event_type="body_edit", + summary="Body updated", + prior_state={"body": "before"}, + turn_id=turn.id, + ) + second_event = store.record_epic_event( + epic_id=epic.id, + transaction_id="txn_shared", + event_type="checklist_change", + summary="Checklist updated", + prior_state={"items": [item.model_dump(mode='json') for item in replaced]}, + turn_id=turn.id, + ) + latest_event = store.record_epic_event( + epic_id=epic.id, + transaction_id="txn_latest", + event_type="state_change", + summary="State updated", + prior_state={"state": "shaping"}, + turn_id=turn.id, + ) + assert {row.id for row in store.events_by_transaction("txn_shared")} == {first_event.id, second_event.id} + assert store.latest_transaction_id(epic.id) == "txn_latest" + assert store.list_epic_events(epic.id, kinds=["state_change"])[0].id == latest_event.id + + request = store.insert_pending( + idempotency_key="idem_1", + provider="discord", + endpoint="POST /channels/channel_1/messages", + request_summary={"content_preview": "hello"}, + request_body={"content": "hello"}, + turn_id=turn.id, + ) + assert store.find_pending_external_requests(0)[0].id == request.id + orphaned = store.mark_orphaned(request.id, error_details={"reason": "expired"}) + assert orphaned.status == "orphaned" + confirmed = store.mark_confirmed( + store.insert_pending( + idempotency_key="idem_2", + provider="discord", + endpoint="POST /channels/channel_1/messages", + request_summary={"content_preview": "second"}, + request_body={"content": "second"}, + ).id, + provider_request_id="discord-req", + provider_response_summary={"ok": True}, + ) + assert confirmed.status == "confirmed" + + user_image = store.create_image(epic_id=epic.id, source="user_uploaded", storage_url="images/a.png") + hero = store.create_image(epic_id=epic.id, source="agent_generated", storage_url="images/b.png", reference_key="hero") + assert user_image.reference_key == "img_user_upload_1" + assert store.load_active_image_by_reference(epic.id, "hero").id == hero.id + assert store.active_image_reference_exists(epic.id, "hero") is True + assert store.deactivate_active_image_reference(epic.id, "hero")[0].active is False + + second_opinion = store.create_second_opinion( + epic_id=epic.id, + requested_by="user", + focus_areas=["tone"], + raw_response="raw", + score=8, + summary="solid", + verdict="keep going", + model_used="mock", + ) + updated_opinion = store.set_second_opinion_checklist_items(second_opinion.id, [replaced[0].id]) + assert updated_opinion.resulting_checklist_item_ids == [replaced[0].id] + + codebase = store.create_codebase(owner="openai", name="megaplan", default_branch="main", group_name="backend") + assert store.find_codebase("openai", "megaplan").id == codebase.id + assert store.upsert_codebase(owner="openai", name="megaplan", default_branch="trunk").default_branch == "trunk" + artifact = store.create_code_artifact( + kind="excerpt", + source="codebase", + content="print('hi')", + codebase_id=codebase.id, + epic_id=epic.id, + file_path="app.py", + scope="file", + metadata={"cache_key": "ignore"}, + ) + assert store.touch_code_artifact_used(artifact.id).last_used_at is not None + cache = store.upsert_api_cache(cache_key="cache-1", content="cached", epic_id=epic.id) + assert store.get_api_cache("cache-1", touch=False).id == cache.id + assert store.cleanup_expired_api_cache() == 0 + + feedback = store.create_feedback( + kind="friction", + content="slow", + source="agent_observation", + epic_id=epic.id, + ) + assert store.list_observations(resolved=False)[0].id == feedback.id + assert store.update_feedback(feedback.id, resolved=True).resolved_at is not None + + orphan_plan = store.create_plan(sprint_id=None, epic_id=None, name="orphan-plan", idea="legacy") + epic_plan = store.create_plan(sprint_id=sprint.id, epic_id=epic.id, name="epic-plan", idea="scoped") + assert orphan_plan.epic_id is None + plans = store.list_plans(include_orphans=True) + assert {plan.id for plan in plans} >= {orphan_plan.id, epic_plan.id} + assert any(plan.id == orphan_plan.id for plan in store.list_plans(include_orphans=True) if plan.epic_id is None) + store.write_plan_artifact(orphan_plan.id, "state.json", b"{\"ok\": true}\n") + assert store.read_plan_artifact(orphan_plan.id, "state.json") == b"{\"ok\": true}\n" + assert store.stat_plan_artifact(orphan_plan.id, "state.json").size_bytes == len(b"{\"ok\": true}\n") + assert store.list_plan_artifacts(orphan_plan.id)[0].name == "state.json" + + lease = store.acquire_execution_lease(orphan_plan.id, holder_id="worker-a", worker_kind="local_cli", ttl_seconds=120) + assert lease.plan_id == orphan_plan.id + assert store.heartbeat_lease(orphan_plan.id, "worker-a").holder_id == "worker-a" + assert store.get_active_lease(orphan_plan.id).holder_id == "worker-a" + store.release_lease(orphan_plan.id, "worker-a") + assert store.get_active_lease(orphan_plan.id) is None + + lock = store.acquire_lock(epic.id, "holder-a", 120) + assert lock.holder_id == "holder-a" + try: + store.acquire_lock(epic.id, "holder-b", 120) + except Exception: + pass + else: + raise AssertionError("expected lock conflict") + store.release_lock(epic.id, "holder-a") + + control = store.put_control_message( + ControlMessageInput( + epic_id=epic.id, + actor_id="actor-1", + intent="pause_plan", + target_id=orphan_plan.id, + payload={"reason": "wait"}, + idempotency_key="control-1", + ) + ) + claimed = store.claim_pending_control_messages(processor_id="proc-1") + assert claimed[0].id == control.id + store.mark_control_message_processed(control.id, {"ok": True}) + progress = store.append_progress_event( + ProgressEventInput( + epic_id=epic.id, + plan_id=orphan_plan.id, + kind="phase_start", + summary="started", + details={"phase": "execute"}, + ) + ) + assert store.list_progress_events(plan_id=orphan_plan.id)[0].id == progress.id + + actor = store.create_automation_actor( + actor_id="actor-1", + name="CLI", + granted_epic_ids="*", + actor_kind="cli", + ) + assert store.load_automation_actor(actor.id).name == "CLI" + assert store.update_automation_actor(actor.id, name="CLI v2").name == "CLI v2" + + +def run_arnold_adapter_contract(store_factory: Callable[[], Store]) -> None: + adapter = ArnoldStoreAdapter(store_factory()) + epic = adapter.create_epic(title="Title", goal="Goal", body="# Title\n") + inbound = adapter.create_message(epic_id=epic["id"], direction="inbound", content="hello") + turn = adapter.create_turn(epic_id=None, triggered_by_message_ids=[], prompt_snapshot={"phase": "bootstrap"}) + assert turn["epic_id"] is None + assert adapter.acquire_epic_lock(epic["id"], holder_id="holder-a") is True + assert adapter.acquire_epic_lock(epic["id"], holder_id="holder-b") is False + adapter.release_epic_lock(epic["id"], holder_id="holder-a") + assert adapter.load_message(inbound["id"])["content"] == "hello" + assert adapter.load_hot_context(epic["id"])["epic"]["id"] == epic["id"] diff --git a/pyproject.toml b/pyproject.toml index fce38862..22190f87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ ] dependencies = [ "PyYAML>=6.0", + "pydantic>=2.0", ] [project.optional-dependencies] @@ -34,7 +35,6 @@ agent = [ "pyyaml", "requests", "jinja2", - "pydantic>=2.0", "prompt_toolkit", "firecrawl-py", "parallel-web>=0.4.2", diff --git a/tests/conftest.py b/tests/conftest.py index aa1867ab..568ca9da 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,15 @@ from megaplan.workers import WorkerResult +def pytest_addoption(parser: pytest.Parser) -> None: + parser.addoption( + "--backend", + action="store", + default=None, + help="Optional storage backend selector used by Sprint 1 backend tests.", + ) + + def read_json(path: Path) -> dict: return json.loads(path.read_text(encoding="utf-8")) diff --git a/tests/test_file_store.py b/tests/test_file_store.py new file mode 100644 index 00000000..0a5fdf64 --- /dev/null +++ b/tests/test_file_store.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import inspect +from pathlib import Path + +import pytest + +from megaplan.store import DBStore, FileStore, LocalDirBlobStore, Store +from megaplan.tests.store_contract import run_arnold_adapter_contract, run_store_contract + + +def test_file_store_contract(tmp_path: Path) -> None: + run_store_contract(lambda: FileStore(tmp_path / "store")) + + +def test_file_store_arnold_adapter_contract(tmp_path: Path) -> None: + run_arnold_adapter_contract(lambda: FileStore(tmp_path / "store")) + + +def test_file_store_places_orphan_plans_under_orphan_root(tmp_path: Path) -> None: + store = FileStore(tmp_path / "store") + plan = store.create_plan(sprint_id=None, epic_id=None, name="legacy-plan", idea="legacy") + turn = store.create_turn(epic_id=None, triggered_by_message_ids=[]) + message = store.create_message(epic_id=None, direction="inbound", content="bootstrap") + + assert (tmp_path / "store" / "orphan_plans" / plan.id / "plan.json").exists() + assert (tmp_path / "store" / "turns" / f"{turn.id}.json").exists() + assert (tmp_path / "store" / "messages" / f"{message.id}.json").exists() + + +def test_local_dir_blob_store_round_trip(tmp_path: Path) -> None: + store = LocalDirBlobStore(tmp_path / "blobs") + + ref = store.put("blob-1", b"hello", content_type="text/plain") + + assert ref.blob_id == "blob-1" + assert store.get("blob-1") == b"hello" + assert store.stat("blob-1").size_bytes == 5 + assert store.url("blob-1").endswith("data.txt") + + store.delete("blob-1") + + assert store.stat("blob-1") is None + + +def test_db_store_methods_raise_not_implemented() -> None: + db = DBStore() + + for name, func in inspect.getmembers(Store, predicate=inspect.isfunction): + if name.startswith("_"): + continue + with pytest.raises(NotImplementedError): + getattr(db, name)() diff --git a/tests/test_io_journal.py b/tests/test_io_journal.py new file mode 100644 index 00000000..c98e4412 --- /dev/null +++ b/tests/test_io_journal.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import json +import os +import time +from pathlib import Path + +from megaplan._core.io import ( + append_framed_json_transaction, + commit_journal_transaction, + framed_json_record_bytes, + journal_blob_promotion, + journal_bytes_write, + journal_event_log, + journal_text_write, + prepare_journal_transaction, + read_committed_framed_json_records, + recover_journal, + scrub_stale_staging_files, + write_journal_commit_marker, +) + + +def test_framed_json_scanner_ignores_incomplete_tail(tmp_path: Path) -> None: + events_path = tmp_path / "events.jsonl" + append_framed_json_transaction( + events_path, + "tx-1", + [{"event_type": "status_changed", "summary": "committed"}], + ) + + with events_path.open("ab") as handle: + handle.write(framed_json_record_bytes({"tx_id": "tx-2", "event_type": "_tx_begin"})) + partial = framed_json_record_bytes({"tx_id": "tx-2", "event_type": "partial", "summary": "tail"}) + handle.write(partial[:-3]) + + assert read_committed_framed_json_records(events_path) == [ + {"tx_id": "tx-1", "event_type": "status_changed", "summary": "committed"}, + ] + + +def test_recover_journal_discards_uncommitted_prepare(tmp_path: Path) -> None: + root = tmp_path / "epic" + target = root / "state.json" + events_path = root / "events.jsonl" + + prepare_journal_transaction( + root, + "tx-discard", + writes=[journal_text_write(target, json.dumps({"status": "pending"}) + "\n", tx_id="tx-discard")], + event_logs=[journal_event_log(events_path, [{"event_type": "pending", "summary": "ignored"}])], + ) + + result = recover_journal(root) + + assert result["discarded"] == ["tx-discard"] + assert result["replayed"] == [] + assert not target.exists() + assert read_committed_framed_json_records(events_path) == [] + + +def test_recover_journal_replays_committed_transaction(tmp_path: Path) -> None: + root = tmp_path / "epic" + target = root / "state.json" + events_path = root / "events.jsonl" + + prepare_journal_transaction( + root, + "tx-replay", + writes=[journal_text_write(target, json.dumps({"status": "done"}) + "\n", tx_id="tx-replay")], + event_logs=[journal_event_log(events_path, [{"event_type": "done", "summary": "replayed"}])], + ) + write_journal_commit_marker(root, "tx-replay") + + result = recover_journal(root) + + assert result["replayed"] == ["tx-replay"] + assert json.loads(target.read_text(encoding="utf-8")) == {"status": "done"} + assert read_committed_framed_json_records(events_path) == [ + {"tx_id": "tx-replay", "event_type": "done", "summary": "replayed"}, + ] + assert not any((root / "_journal").glob("*.prepare.json")) + + +def test_commit_journal_transaction_promotes_blob_and_metadata(tmp_path: Path) -> None: + root = tmp_path / "epic" + blob_dir = root / "blobs" / "blob-1" + payload = b"hello blob" + metadata = {"blob_id": "blob-1", "content_type": "text/plain"} + + prepare_journal_transaction( + root, + "tx-blob", + blobs=[journal_blob_promotion(blob_dir, payload, extension="txt", metadata=metadata)], + writes=[journal_bytes_write(root / "receipt.bin", b"ok", tx_id="tx-blob")], + ) + + commit_journal_transaction(root, "tx-blob") + + assert (blob_dir / "data.txt").read_bytes() == payload + assert json.loads((blob_dir / "meta.json").read_text(encoding="utf-8")) == metadata + assert not (blob_dir / "data.staging").exists() + assert (root / "receipt.bin").read_bytes() == b"ok" + + +def test_scrub_stale_staging_files_removes_old_entries_only(tmp_path: Path) -> None: + blobs_root = tmp_path / "blobs" + old_staging = blobs_root / "old" / "data.staging" + fresh_staging = blobs_root / "fresh" / "data.staging" + old_staging.parent.mkdir(parents=True) + fresh_staging.parent.mkdir(parents=True) + old_staging.write_bytes(b"old") + fresh_staging.write_bytes(b"fresh") + + stale_time = time.time() - 7200 + os.utime(old_staging, (stale_time, stale_time)) + + removed = scrub_stale_staging_files(blobs_root) + + assert removed == [old_staging] + assert not old_staging.exists() + assert fresh_staging.exists() diff --git a/tests/test_plan_repository.py b/tests/test_plan_repository.py new file mode 100644 index 00000000..f6ed7725 --- /dev/null +++ b/tests/test_plan_repository.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import json +import shutil +from pathlib import Path + +from megaplan.schemas import Plan +from megaplan.store import PlanRepository +from megaplan._core.io import orphan_plans_root + + +FIXTURE_ROOT = Path("arnold-source/.megaplan/plans") + + +def _copy_fixture(tmp_path: Path, name: str) -> Path: + source = FIXTURE_ROOT / name + target = tmp_path / name + shutil.copytree(source, target) + return target + + +def test_plan_repository_resolves_canonical_orphan_plan_and_lock_path( + tmp_path: Path, + monkeypatch, +) -> None: + home = tmp_path / "home" + home.mkdir() + monkeypatch.setenv("HOME", str(home)) + + project = tmp_path / "project" + project.mkdir() + (project / ".git").mkdir() + + plan_dir = orphan_plans_root(project) / "canonical-plan" + plan_dir.mkdir(parents=True) + (plan_dir / "state.json").write_text( + json.dumps( + { + "name": "canonical-plan", + "idea": "Keep existing behavior", + "current_state": "initialized", + "iteration": 1, + "created_at": "2026-05-03T00:00:00Z", + "config": {}, + "sessions": {}, + "plan_versions": [], + "history": [], + "meta": {}, + "last_gate": {}, + } + ), + encoding="utf-8", + ) + + repo = PlanRepository(project).for_plan("canonical-plan") + + assert repo.plan_dir == plan_dir + assert repo.working_dir == plan_dir + assert repo.compatibility_lock_path == plan_dir / ".plan.lock" + + +def test_plan_repository_round_trips_fixture_bytes_without_layout_changes(tmp_path: Path) -> None: + plan_dir = _copy_fixture(tmp_path, "sprint-6-images-second-opinion") + repo = PlanRepository.from_plan_dir(plan_dir) + + before = {name: repo.read_artifact_bytes(name) for name in repo.list_artifact_names()} + + for name, payload in before.items(): + assert payload is not None + repo.write_artifact_bytes(name, payload) + + after = {name: repo.read_artifact_bytes(name) for name in repo.list_artifact_names()} + + assert after == before + assert repo.list_artifact_names() == sorted(before) + + +def test_plan_repository_preserves_lexicographic_execution_batch_order(tmp_path: Path) -> None: + plan_dir = _copy_fixture(tmp_path, "sprint-1b-discord-resident") + repo = PlanRepository.from_plan_dir(plan_dir) + + batch_names = [path.name for path in repo.list_execution_batch_artifacts()] + + assert batch_names[:6] == [ + "execution_batch_1.json", + "execution_batch_10.json", + "execution_batch_11.json", + "execution_batch_12.json", + "execution_batch_13.json", + "execution_batch_14.json", + ] + assert batch_names[-1] == "execution_batch_9.json" + assert repo.latest_execution_batch_artifact() == plan_dir / "execution_batch_9.json" + + +def test_plan_repository_load_plan_exposes_hot_state_and_artifact_manifest(tmp_path: Path) -> None: + plan_dir = _copy_fixture(tmp_path, "sprint-1b-discord-resident") + repo = PlanRepository.from_plan_dir(plan_dir) + + plan = repo.load_plan() + + assert isinstance(plan, Plan) + assert plan.id == "sprint-1b-discord-resident" + assert plan.name == "sprint-1b-discord-resident" + assert plan.latest_review is not None + assert plan.latest_execution is not None + assert repo.compatibility_lock_path.exists() + assert any(artifact.name == "execution_batch_10.json" and artifact.batch == 10 for artifact in plan.artifacts) diff --git a/tests/test_storage_models.py b/tests/test_storage_models.py new file mode 100644 index 00000000..8473ceaf --- /dev/null +++ b/tests/test_storage_models.py @@ -0,0 +1,294 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +import pytest + +from megaplan.schemas import ( + AutomationActor, + BotTurn, + ChecklistItem, + CodeArtifact, + Codebase, + ControlMessage, + Epic, + EpicEvent, + EpicLock, + ExecutionLease, + ExternalRequest, + Feedback, + Image, + Message, + MigrationRun, + Plan, + PlanArtifact, + ProgressEvent, + SecondOpinion, + Sprint, + SprintItem, + SystemLog, + ToolCall, +) +from tests.conftest import load_state + + +NOW = datetime(2026, 5, 3, tzinfo=timezone.utc) + + +@pytest.mark.parametrize( + ("model_cls", "payload"), + [ + (Epic, {"id": "epic_1", "title": "Epic", "goal": "Ship it", "body": "Body", "state": "shaping"}), + (BotTurn, {"id": "turn_1", "status": "in_progress"}), + (Message, {"id": "msg_1", "direction": "inbound", "content": "hello"}), + (ToolCall, {"id": "tool_1", "turn_id": "turn_1", "tool_name": "read_file", "operation_kind": "read"}), + (SystemLog, {"id": "log_1", "level": "info", "category": "system", "event_type": "boot", "message": "ok"}), + (EpicLock, {"epic_id": "epic_1", "holder_id": "worker", "expires_at": NOW}), + ( + ExternalRequest, + { + "id": "req_1", + "idempotency_key": "idem_1", + "provider": "openai", + "endpoint": "/v1/responses", + "status": "pending", + }, + ), + ( + Image, + { + "id": "img_1", + "source": "agent_generated", + "storage_url": "https://example.invalid/image.png", + "reference_key": "hero", + }, + ), + (ChecklistItem, {"id": "item_1", "epic_id": "epic_1", "content": "Do work", "position": 1}), + (EpicEvent, {"id": "event_1", "epic_id": "epic_1", "transaction_id": "tx_1", "summary": "Created"}), + ( + Feedback, + { + "id": "feedback_1", + "kind": "style", + "content": "Prefer concise replies", + "source": "explicit_save_request", + }, + ), + ( + Sprint, + { + "id": "sprint_1", + "epic_id": "epic_1", + "sprint_number": 1, + "name": "Sprint 1", + "goal": "Deliver foundation", + "status": "done", + }, + ), + ( + SprintItem, + { + "id": "sprint_item_1", + "sprint_id": "sprint_1", + "content": "Implement model layer", + "estimated_complexity": "medium", + "status": "open", + "position": 1, + }, + ), + ( + SecondOpinion, + { + "id": "opinion_1", + "epic_id": "epic_1", + "requested_by": "user", + "raw_response": "Looks good", + "score": 8, + "summary": "Strong plan", + "verdict": "go", + "model_used": "gpt-5.5", + }, + ), + ( + Codebase, + { + "id": "codebase_1", + "owner": "openai", + "name": "megaplan", + "default_branch": "main", + }, + ), + ( + CodeArtifact, + { + "id": "artifact_1", + "kind": "summary", + "source": "codebase", + "content": "Important details", + }, + ), + ( + MigrationRun, + { + "id": "migration_1", + "epic_id": "epic_1", + "source_backend": "file", + "target_backend": "db", + "phase": "planning", + "holder_id": "worker", + "expires_at": NOW, + }, + ), + ( + ExecutionLease, + { + "plan_id": "plan_1", + "holder_id": "worker", + "phase": "execute", + "worker_kind": "local_cli", + "expires_at": NOW, + }, + ), + ( + PlanArtifact, + { + "name": "plan_v1.md", + "kind": "markdown", + "role": "plan_version", + "sha256": "deadbeef", + "created_at": NOW, + "updated_at": NOW, + }, + ), + ( + ControlMessage, + { + "id": "control_1", + "epic_id": "epic_1", + "actor_id": "actor_1", + "intent": "run_sprint", + "target_id": "sprint_1", + "idempotency_key": "msg_1", + }, + ), + ( + ProgressEvent, + { + "id": "progress_1", + "epic_id": "epic_1", + "kind": "phase_start", + "summary": "Execution started", + }, + ), + ( + AutomationActor, + { + "id": "actor_1", + "name": "Local CLI", + "granted_epic_ids": "*", + "actor_kind": "cli", + }, + ), + ( + Plan, + { + "id": "plan_1", + "name": "plan_1", + "revision": 0, + "idea": "test idea", + "current_state": "planned", + "iteration": 1, + "config": {"project_dir": "/tmp/project"}, + "sessions": {}, + "plan_versions": [], + "history": [], + "meta": {}, + "last_gate": {}, + "created_at": NOW, + "updated_at": NOW, + }, + ), + ], +) +def test_storage_models_construct_with_minimal_valid_payloads(model_cls, payload) -> None: + model = model_cls.model_validate(payload) + + assert model.model_dump() + + +def test_storage_models_normalize_json_defaults_and_extensions() -> None: + request = ExternalRequest.model_validate( + { + "id": "req_1", + "idempotency_key": "idem_1", + "provider": "openai", + "endpoint": "/v1/responses", + "status": "pending", + "request_summary": None, + "request_body": {"model": "gpt-5.5"}, + } + ) + opinion = SecondOpinion.model_validate( + { + "id": "opinion_1", + "epic_id": "epic_1", + "requested_by": "auto_state_gate", + "focus_areas": None, + "raw_response": "Need more detail", + "score": 4, + "summary": "Needs work", + "verdict": "revise", + "resulting_checklist_item_ids": None, + "model_used": "gpt-5.5", + } + ) + sprint = Sprint.model_validate( + { + "id": "sprint_1", + "epic_id": "epic_1", + "sprint_number": 1, + "name": "Sprint 1", + "goal": "Deliver foundation", + "status": "running", + } + ) + + assert request.request_summary == {} + assert request.request_body == {"model": "gpt-5.5"} + assert opinion.focus_areas == [] + assert opinion.resulting_checklist_item_ids == [] + assert sprint.status == "running" + + +def test_plan_round_trips_current_plan_state_shape(plan_fixture) -> None: + state = load_state(plan_fixture.plan_dir) + + plan = Plan.from_plan_state(state, plan_id="plan_1", revision=3) + + assert plan.name == state["name"] + assert plan.revision == 3 + assert plan.to_plan_state() == state + + +def test_feedback_and_sprint_constraints_match_design_extensions() -> None: + with pytest.raises(ValueError): + Feedback.model_validate( + { + "id": "feedback_1", + "kind": "style", + "content": "Too vague", + "source": "agent_observation", + } + ) + + with pytest.raises(ValueError): + Sprint.model_validate( + { + "id": "sprint_1", + "epic_id": "epic_1", + "sprint_number": 1, + "name": "Sprint 1", + "goal": "Deliver foundation", + "status": "queued", + } + ) diff --git a/tests/test_storage_roots.py b/tests/test_storage_roots.py new file mode 100644 index 00000000..58b5fbe9 --- /dev/null +++ b/tests/test_storage_roots.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from megaplan import auto +from megaplan._core import active_plan_dirs, resolve_plan_dir +from megaplan._core.io import canonical_megaplan_root, find_plan_dir, orphan_plans_root, repo_storage_id + + +def _write_state(plan_dir: Path, name: str) -> None: + plan_dir.mkdir(parents=True, exist_ok=True) + (plan_dir / "state.json").write_text( + json.dumps({"name": name, "current_state": "initialized"}), + encoding="utf-8", + ) + + +def test_repo_storage_id_is_stable_across_git_worktrees(tmp_path: Path) -> None: + main_repo = tmp_path / "repo-main" + main_repo.mkdir() + (main_repo / ".git").mkdir() + + worktree = tmp_path / "repo-worktree" + worktree.mkdir() + worktree_gitdir = main_repo / ".git" / "worktrees" / "feature-a" + worktree_gitdir.mkdir(parents=True) + (worktree / ".git").write_text(f"gitdir: {worktree_gitdir}\n", encoding="utf-8") + + assert repo_storage_id(main_repo) == repo_storage_id(worktree) + + +def test_canonical_orphan_plans_are_resolved_from_child_paths( + tmp_path: Path, + monkeypatch, +) -> None: + home = tmp_path / "home" + home.mkdir() + monkeypatch.setenv("HOME", str(home)) + + project = tmp_path / "project" + project.mkdir() + (project / ".git").mkdir() + + plan_dir = orphan_plans_root(project) / "canonical-plan" + _write_state(plan_dir, "canonical-plan") + + child = project / "src" / "nested" + child.mkdir(parents=True) + + assert find_plan_dir(child, "canonical-plan") == plan_dir + assert auto._resolve_plan_dir("canonical-plan", child) == plan_dir + assert resolve_plan_dir(project, "canonical-plan") == plan_dir + + +def test_legacy_plan_resolution_stays_in_place_without_eager_copy( + tmp_path: Path, + monkeypatch, +) -> None: + home = tmp_path / "home" + home.mkdir() + monkeypatch.setenv("HOME", str(home)) + + project = tmp_path / "legacy-project" + project.mkdir() + (project / ".git").mkdir() + + legacy_plan_dir = project / ".megaplan" / "plans" / "legacy-plan" + _write_state(legacy_plan_dir, "legacy-plan") + + resolved = resolve_plan_dir(project, "legacy-plan") + + assert resolved == legacy_plan_dir + assert active_plan_dirs(project) == [legacy_plan_dir] + assert not canonical_megaplan_root(project).exists() diff --git a/tests/test_store_contract_adapter.py b/tests/test_store_contract_adapter.py new file mode 100644 index 00000000..35d1792b --- /dev/null +++ b/tests/test_store_contract_adapter.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +from contextlib import contextmanager +from datetime import timedelta +import importlib.util +from pathlib import Path +import sys + +from megaplan.schemas import BotTurn, ChecklistItem, Epic, EpicLock, Message, SystemLog, ToolCall, utc_now +from megaplan.store import ArnoldStoreAdapter, BlobStore, HotContext, LockConflict, Store + + +def _public_methods(cls: type[object]) -> set[str]: + return { + name + for name, value in cls.__dict__.items() + if callable(value) and not name.startswith("_") + } + + +def _load_arnold_ports() -> object: + path = Path(__file__).resolve().parents[1] / "arnold-source" / "agent_kit" / "ports.py" + spec = importlib.util.spec_from_file_location("_arnold_ports", path) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +class _FakeStore: + def __init__(self) -> None: + self.transaction_epic_ids: list[str | None] = [] + self.messages: dict[str, Message] = {} + + @contextmanager + def transaction(self, epic_id: str | None = None): + self.transaction_epic_ids.append(epic_id) + yield self + + def create_message(self, **fields: object) -> Message: + message = Message( + id="msg_1", + epic_id=fields.get("epic_id"), + direction=str(fields["direction"]), + content=str(fields["content"]), + bot_turn_id=fields.get("bot_turn_id"), + discord_message_id=fields.get("discord_message_id"), + ) + self.messages[message.id] = message + return message + + def load_message(self, message_id: str) -> Message | None: + return self.messages.get(message_id) + + def create_turn(self, **fields: object) -> BotTurn: + return BotTurn( + id="turn_1", + epic_id=fields.get("epic_id"), + triggered_by_message_ids=list(fields.get("triggered_by_message_ids", [])), + status="in_progress", + prompt_snapshot=fields.get("prompt_snapshot"), + ) + + def add_checklist_items(self, epic_id: str, items: object) -> list[ChecklistItem]: + return [ + ChecklistItem( + id=f"check_{index}", + epic_id=epic_id, + content=item.content, + status=item.status, + position=item.position or index, + source=item.source, + ) + for index, item in enumerate(items, start=1) + ] + + def update_epic(self, epic_id: str, **changes: object) -> Epic: + return Epic( + id=epic_id, + title=str(changes.get("title") or "Updated"), + goal=str(changes.get("goal") or "Goal"), + body=str(changes.get("body") or "# Updated"), + state=str(changes.get("state") or "shaping"), + ) + + def acquire_lock(self, epic_id: str, holder_id: str, ttl_seconds: int) -> EpicLock: + if holder_id == "blocked": + raise LockConflict("held elsewhere") + return EpicLock( + epic_id=epic_id, + holder_id=holder_id, + expires_at=utc_now() + timedelta(seconds=ttl_seconds), + ) + + def release_lock(self, epic_id: str, holder_id: str) -> None: + return None + + def record_tool_call(self, **fields: object) -> ToolCall: + return ToolCall( + id="tool_1", + turn_id=str(fields["turn_id"]), + tool_name=str(fields["tool_name"]), + operation_kind=str(fields["operation_kind"]), + arguments=dict(fields["arguments"]), + result=dict(fields["result"]), + duration_ms=int(fields["duration_ms"]), + ) + + def log_system_event(self, **fields: object) -> SystemLog: + return SystemLog( + id="log_1", + level=str(fields["level"]), + category=str(fields["category"]), + event_type=str(fields["event_type"]), + message=str(fields["message"]), + details=dict(fields.get("details") or {}), + turn_id=fields.get("turn_id"), + epic_id=fields.get("epic_id"), + ) + + def load_hot_context(self, epic_id: str | None) -> HotContext: + return HotContext( + epic=Epic( + id="epic_1", + title="Title", + goal="Goal", + body="# Title", + state="shaping", + ) + if epic_id + else None, + recent_messages=list(self.messages.values()), + ) + + +def test_store_protocol_includes_refined_sprint_1_surface() -> None: + expected_store_methods = { + "transaction", + "load_body", + "update_body", + "seed_checklist", + "set_sprint_queue", + "load_message", + "record_tool_call", + "log_system_event", + "acquire_lock", + "release_lock", + "create_turn", + "create_message", + "load_hot_context", + "create_plan", + "write_plan_artifact", + "acquire_execution_lease", + "put_control_message", + "claim_pending_control_messages", + "append_progress_event", + } + assert expected_store_methods.issubset(_public_methods(Store)) + assert _public_methods(BlobStore) == {"put", "get", "url", "delete", "stat"} + + +def test_arnold_store_adapter_covers_live_arnold_store_surface() -> None: + arnold_ports = _load_arnold_ports() + missing = _public_methods(arnold_ports.Store) - _public_methods(ArnoldStoreAdapter) + assert not missing + + +def test_arnold_store_adapter_preserves_bootstrap_seed_and_lock_compatibility() -> None: + adapter = ArnoldStoreAdapter(_FakeStore()) + + with adapter.transaction(): + pass + assert adapter._store.transaction_epic_ids == [None] + + message = adapter.create_message(epic_id=None, direction="inbound", content="bootstrap") + assert message["epic_id"] is None + assert adapter.load_message(message["id"])["content"] == "bootstrap" + + turn = adapter.create_turn(epic_id=None, triggered_by_message_ids=[], prompt_snapshot={"phase": "bootstrap"}) + assert turn["epic_id"] is None + + updated = adapter.update_epic("epic_1", body="# Body", title="Body Title", goal="Body Goal") + assert updated["body"] == "# Body" + + seeded = adapter.seed_checklist("epic_1", ["First", "Second"]) + assert [item["position"] for item in seeded] == [1, 2] + assert [item["source"] for item in seeded] == ["default_seed", "default_seed"] + + tool_call = adapter.record_tool_call( + turn_id="turn_1", + tool_name="edit_epic", + operation_kind="write", + arguments={"body": "# Body"}, + result={"ok": True}, + duration_ms=1, + ) + assert tool_call["arguments"]["body"] == "# Body" + + log = adapter.log_system_event( + level="info", + category="system", + event_type="bootstrap", + message="ok", + details={"ok": True}, + epic_id=None, + ) + assert log["details"]["ok"] is True + + assert adapter.acquire_epic_lock("epic_1", holder_id="holder_a") is True + assert adapter.acquire_epic_lock("epic_1", holder_id="blocked") is False + assert adapter.load_hot_context(None)["epic"] is None diff --git a/tests/test_store_layout.py b/tests/test_store_layout.py new file mode 100644 index 00000000..ae5ee896 --- /dev/null +++ b/tests/test_store_layout.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from importlib import import_module +from typing import get_args + +from pydantic import BaseModel + + +def test_megaplan_schemas_keeps_legacy_exports() -> None: + schemas = import_module("megaplan.schemas") + + assert "plan.json" in schemas.SCHEMAS + assert callable(schemas.strict_schema) + assert callable(schemas.get_execution_schema_key) + assert issubclass(schemas.StorageModel, BaseModel) + + +def test_store_package_exports_sprint_1_seams() -> None: + store = import_module("megaplan.store") + + assert get_args(store.Backend) == ("file", "db") + assert store.Store.__module__ == "megaplan.store.base" + assert store.BlobStore.__module__ == "megaplan.store.blob" + assert store.FileStore.__module__ == "megaplan.store.file" + assert store.DBStore.__module__ == "megaplan.store.db" + assert store.PlanRepository.__module__ == "megaplan.store.plan_repository"