fix(resume): persist all LLM calls, enforce oldest-first ordering, and correctly copy reused stage outputs

lufftw · lufftw · commit 4a5ecf909a40 · 2025-12-15T12:17:10.000+08:00
This change fixes multiple critical issues in resume/debug workflows and
formalizes the regeneration model.

Key improvements:

- Ensure *all* LLM API calls are persisted (input + output), regardless of
  normal run or resume mode.
  - Fixed missing persistence in base_agent.ainvoke()
  - Added persistence for compressor.compress_content() and
    compressor._summarize_history()

- Change previous-run listing order from newest-first to oldest-first,
  matching execution and print order semantics.

- Enforce a strict regeneration principle:
  - Original run directories are treated as read-only.
  - All outputs (generated or reused) are written into the new regen directory.
  - Reused stages copy *all related files* (final outputs + LLM input/output)
    from the original run into the new directory.

- Implement precise stage file identification to support correct reuse prompts:
  - expert_review: LLM invoke/review files for expert agents
  - full_discussion: LLM discuss files
  - consensus: consensus-related files
  - writer: writer-related LLM and output files

- Improve resume behavior:
  - If a later stage already exists in the original run, the system can
    dynamically prompt whether to reuse or regenerate it.
  - Reused stages are still fully materialized in the regen directory for
    traceability and comparison.

Result:
Each regen run is now a complete, self-contained, and auditable record,
while original runs remain immutable.
diff --git a/tools/ai-markmap-agent/src/graph.py b/tools/ai-markmap-agent/src/graph.py
@@ -334,9 +334,29 @@ def run_expert_review(state: WorkflowState) -> WorkflowState:
         
         # Check if we should skip this phase (resume mode)
         resume_config = state.get("_resume_config", {})
-        if resume_config and resume_config.get("reuse_stages", {}).get("expert_review"):
-            print("  ⏭️  Skipping (reusing from previous run)")
-            return state
+        if resume_config:
+            reuse_stages = resume_config.get("reuse_stages", {})
+            if reuse_stages.get("expert_review"):
+                print("  ⏭️  Reusing expert_review from previous run")
+                # Copy all files related to expert_review to new directory
+                resume_run_dir = Path(resume_config["run_dir"])
+                prev_run = RunInfo(resume_run_dir)
+                debug = get_debug_manager(config)
+                if debug.enabled:
+                    import shutil
+                    # Copy all expert_review related files
+                    expert_review_files = prev_run.get_stage_files("expert_review")
+                    if expert_review_files:
+                        for file_info in expert_review_files:
+                            try:
+                                dest = debug.run_dir / file_info["filename"]
+                                shutil.copy2(file_info["path"], dest)
+                                print(f"  💾 Copied: {file_info['filename']}")
+                            except Exception as e:
+                                print(f"  ⚠ Failed to copy {file_info['filename']}: {e}")
+                    else:
+                        print("  ⚠ No expert_review files found in previous run")
+                return state
         
         debug = get_debug_manager(config)
         
@@ -382,9 +402,29 @@ def run_full_discussion(state: WorkflowState) -> WorkflowState:
         
         # Check if we should skip this phase (resume mode)
         resume_config = state.get("_resume_config", {})
-        if resume_config and resume_config.get("reuse_stages", {}).get("full_discussion"):
-            print("  ⏭️  Skipping (reusing from previous run)")
-            return state
+        if resume_config:
+            reuse_stages = resume_config.get("reuse_stages", {})
+            if reuse_stages.get("full_discussion"):
+                print("  ⏭️  Reusing full_discussion from previous run")
+                # Copy all files related to full_discussion to new directory
+                resume_run_dir = Path(resume_config["run_dir"])
+                prev_run = RunInfo(resume_run_dir)
+                debug = get_debug_manager(config)
+                if debug.enabled:
+                    import shutil
+                    # Copy all full_discussion related files
+                    discussion_files = prev_run.get_stage_files("full_discussion")
+                    if discussion_files:
+                        for file_info in discussion_files:
+                            try:
+                                dest = debug.run_dir / file_info["filename"]
+                                shutil.copy2(file_info["path"], dest)
+                                print(f"  💾 Copied: {file_info['filename']}")
+                            except Exception as e:
+                                print(f"  ⚠ Failed to copy {file_info['filename']}: {e}")
+                    else:
+                        print("  ⚠ No full_discussion files found in previous run")
+                return state
         
         debug = get_debug_manager(config)
         
@@ -426,10 +466,79 @@ def run_consensus(state: WorkflowState) -> WorkflowState:
         
         # Check if we should skip this phase (resume mode)
         resume_config = state.get("_resume_config", {})
-        if resume_config and resume_config.get("reuse_stages", {}).get("consensus"):
-            print("  ⏭️  Skipping (reusing from previous run)")
-            # Consensus should already be loaded in initialize()
-            return state
+        if resume_config:
+            reuse_stages = resume_config.get("reuse_stages", {})
+            
+            resume_run_dir = Path(resume_config["run_dir"])
+            prev_run = RunInfo(resume_run_dir)
+            
+            # If explicitly marked to reuse, load it
+            if reuse_stages.get("consensus"):
+                print("  ⏭️  Reusing consensus from previous run")
+                # Consensus should already be loaded in initialize()
+                # Copy all consensus files to new directory
+                debug = get_debug_manager(config)
+                if debug.enabled:
+                    import shutil
+                    # Copy all consensus related files
+                    consensus_files = prev_run.get_stage_files("consensus")
+                    if consensus_files:
+                        for file_info in consensus_files:
+                            try:
+                                dest = debug.run_dir / file_info["filename"]
+                                shutil.copy2(file_info["path"], dest)
+                                print(f"  💾 Copied: {file_info['filename']}")
+                            except Exception as e:
+                                print(f"  ⚠ Failed to copy {file_info['filename']}: {e}")
+                    # Also save consensus data if available in state
+                    if "consensus_result" in state:
+                        consensus_result = state["consensus_result"]
+                        consensus_data = {
+                            "adopted": consensus_result.adopted,
+                            "rejected": consensus_result.rejected,
+                            "vote_counts": consensus_result.vote_counts,
+                            "threshold": consensus_threshold,
+                            "_reused_from": prev_run.run_id,
+                        }
+                        debug.save_consensus(consensus_data)
+                return state
+            
+            # If not in reuse list but output exists, ask user
+            if prev_run.has_stage_output("consensus") and "consensus" not in reuse_stages:
+                from ..resume import ask_reuse_stage
+                should_reuse = ask_reuse_stage("consensus", prev_run)
+                if should_reuse:
+                    consensus_data = load_consensus_from_run(prev_run)
+                    if consensus_data:
+                        from .consensus import ConsensusResult
+                        state["consensus_result"] = ConsensusResult(
+                            adopted=consensus_data.get("adopted", []),
+                            rejected=consensus_data.get("rejected", []),
+                            vote_counts=consensus_data.get("vote_counts", {}),
+                            required_votes=0,
+                            num_experts=0,
+                        )
+                        print("  ✓ Loaded consensus from previous run")
+                        # Copy all consensus files to new directory
+                        debug = get_debug_manager(config)
+                        if debug.enabled:
+                            import shutil
+                            # Copy all consensus related files
+                            consensus_files = prev_run.get_stage_files("consensus")
+                            if consensus_files:
+                                for file_info in consensus_files:
+                                    try:
+                                        dest = debug.run_dir / file_info["filename"]
+                                        shutil.copy2(file_info["path"], dest)
+                                        print(f"  💾 Copied: {file_info['filename']}")
+                                    except Exception as e:
+                                        print(f"  ⚠ Failed to copy {file_info['filename']}: {e}")
+                            # Also save consensus data
+                            consensus_data["_reused_from"] = prev_run.run_id
+                            debug.save_consensus(consensus_data)
+                        # Mark as reused so we don't ask again
+                        reuse_stages["consensus"] = True
+                        return state
         
         debug = get_debug_manager(config)
         
@@ -483,17 +592,65 @@ def run_writer(state: WorkflowState) -> WorkflowState:
         
         # Check if we should reuse writer output (resume mode)
         resume_config = state.get("_resume_config", {})
-        if resume_config and resume_config.get("reuse_stages", {}).get("writer"):
-            print("  ⏭️  Reusing writer output from previous run")
+        if resume_config:
+            reuse_stages = resume_config.get("reuse_stages", {})
             resume_run_dir = Path(resume_config["run_dir"])
-            writer_output = load_writer_output_from_run(RunInfo(resume_run_dir))
-            if writer_output:
-                state["final_markmap"] = writer_output
-                state["writer_outputs"]["general_en"] = writer_output
-                print(f"  ✓ Loaded writer output ({len(writer_output)} chars)")
+            prev_run = RunInfo(resume_run_dir)
+            
+            # If explicitly marked to reuse, load it
+            if reuse_stages.get("writer"):
+                print("  ⏭️  Reusing writer from previous run")
+                # Copy all writer related files to new directory
+                debug = get_debug_manager(config)
+                if debug.enabled:
+                    import shutil
+                    # Copy all writer related files (LLM input/output, writer output)
+                    writer_files = prev_run.get_stage_files("writer")
+                    if writer_files:
+                        for file_info in writer_files:
+                            try:
+                                dest = debug.run_dir / file_info["filename"]
+                                shutil.copy2(file_info["path"], dest)
+                                print(f"  💾 Copied: {file_info['filename']}")
+                            except Exception as e:
+                                print(f"  ⚠ Failed to copy {file_info['filename']}: {e}")
+                    # Load writer output content for state
+                    writer_output = load_writer_output_from_run(prev_run)
+                    if writer_output:
+                        state["final_markmap"] = writer_output
+                        state["writer_outputs"]["general_en"] = writer_output
+                        print(f"  ✓ Loaded writer output ({len(writer_output)} chars)")
+                    else:
+                        print("  ⚠ Could not load writer output content")
                 return state
-            else:
-                print("  ⚠ Could not load writer output, regenerating...")
+            
+            # If not in reuse list but output exists, ask user
+            elif prev_run.has_stage_output("writer") and "writer" not in reuse_stages:
+                from ..resume import ask_reuse_stage
+                should_reuse = ask_reuse_stage("writer", prev_run)
+                if should_reuse:
+                    writer_output = load_writer_output_from_run(prev_run)
+                    if writer_output:
+                        state["final_markmap"] = writer_output
+                        state["writer_outputs"]["general_en"] = writer_output
+                        print(f"  ✓ Loaded writer output ({len(writer_output)} chars)")
+                        # Copy all writer files to new directory
+                        debug = get_debug_manager(config)
+                        if debug.enabled:
+                            import shutil
+                            # Copy all writer related files
+                            writer_files = prev_run.get_stage_files("writer")
+                            if writer_files:
+                                for file_info in writer_files:
+                                    try:
+                                        dest = debug.run_dir / file_info["filename"]
+                                        shutil.copy2(file_info["path"], dest)
+                                        print(f"  💾 Copied: {file_info['filename']}")
+                                    except Exception as e:
+                                        print(f"  ⚠ Failed to copy {file_info['filename']}: {e}")
+                        # Mark as reused
+                        reuse_stages["writer"] = True
+                        return state
         
         debug = get_debug_manager(config)
         
@@ -540,6 +697,17 @@ def run_translations(state: WorkflowState) -> WorkflowState:
             return state
         
         print("\n[Phase 5] Translating outputs...")
+        
+        # Check if we should skip this phase (resume mode)
+        resume_config = state.get("_resume_config", {})
+        if resume_config:
+            reuse_stages = resume_config.get("reuse_stages", {})
+            if reuse_stages.get("translate"):
+                print("  ⏭️  Skipping (reusing from previous run)")
+                # Translation outputs should be loaded from previous run
+                # TODO: Load translation outputs if needed
+                return state
+        
         debug = get_debug_manager(config)
         
         writer_outputs = state.get("writer_outputs", {})
diff --git a/tools/ai-markmap-agent/src/resume.py b/tools/ai-markmap-agent/src/resume.py
@@ -70,24 +70,36 @@ def _scan_files(self) -> dict[str, dict[str, Any]]:
                 "mtime_str": mtime.strftime("%Y-%m-%d %H:%M:%S"),
             }
             
-            # Categorize file
-            if filename.startswith("llm_input_"):
-                files_by_phase["llm_input"].append(file_info)
-            elif filename.startswith("llm_output_"):
-                files_by_phase["llm_output"].append(file_info)
-            elif "consensus" in filename:
+            # Categorize file - precise pattern matching
+            filename_lower = filename.lower()
+            
+            # Expert review: llm_input/output with invoke or review, for expert agents
+            if filename.startswith("llm_input_") or filename.startswith("llm_output_"):
+                if "invoke" in filename_lower or "review" in filename_lower:
+                    # Check if it's an expert agent
+                    if any(expert in filename_lower for expert in ["architect", "professor", "engineer", "optimizer"]):
+                        files_by_phase["expert_review"].append(file_info)
+                elif "discuss" in filename_lower:
+                    # Check if it's an expert agent
+                    if any(expert in filename_lower for expert in ["architect", "professor", "engineer", "optimizer"]):
+                        files_by_phase["full_discussion"].append(file_info)
+                elif "writer" in filename_lower:
+                    files_by_phase["writer"].append(file_info)
+                elif "translator" in filename_lower or "translation" in filename_lower:
+                    files_by_phase["translation"].append(file_info)
+                else:
+                    # Generic LLM input/output (add to both lists for backward compatibility)
+                    files_by_phase["llm_input"].append(file_info)
+                    if filename.startswith("llm_output_"):
+                        files_by_phase["llm_output"].append(file_info)
+            elif "consensus" in filename_lower:
                 files_by_phase["consensus"].append(file_info)
-            elif "writer" in filename:
+            elif "writer" in filename_lower:
                 files_by_phase["writer"].append(file_info)
-            elif "translation" in filename or "translator" in filename:
+            elif "translation" in filename_lower or "translator" in filename_lower:
                 files_by_phase["translation"].append(file_info)
-            elif "postproc" in filename or "post_processing" in filename:
+            elif "postproc" in filename_lower or "post_processing" in filename_lower:
                 files_by_phase["post_processing"].append(file_info)
-            elif "optimizer" in filename or "architect" in filename or "professor" in filename or "engineer" in filename:
-                if "discuss" in filename:
-                    files_by_phase["full_discussion"].append(file_info)
-                else:
-                    files_by_phase["expert_review"].append(file_info)
         
         return files_by_phase
     
@@ -101,38 +113,14 @@ def _format_size(self, size_bytes: int) -> str:
     
     def has_stage_output(self, stage: str) -> bool:
         """Check if this run has output for a specific stage."""
-        stage_map = {
-            "expert_review": ["expert_review", "llm_output"],
-            "full_discussion": ["full_discussion", "llm_output"],
-            "consensus": ["consensus"],
-            "writer": ["writer", "llm_output"],
-            "translation": ["translation", "llm_output"],
-            "post_processing": ["post_processing"],
-        }
-        
-        check_phases = stage_map.get(stage, [])
-        for phase in check_phases:
-            if self.files.get(phase):
-                return True
-        return False
+        # Check directly by stage name (files are categorized by stage)
+        stage_files = self.files.get(stage, [])
+        return len(stage_files) > 0
     
     def get_stage_files(self, stage: str) -> list[dict[str, Any]]:
         """Get files for a specific stage."""
-        stage_map = {
-            "expert_review": ["expert_review", "llm_output"],
-            "full_discussion": ["full_discussion", "llm_output"],
-            "consensus": ["consensus"],
-            "writer": ["writer", "llm_output"],
-            "translation": ["translation", "llm_output"],
-            "post_processing": ["post_processing"],
-        }
-        
-        all_files = []
-        check_phases = stage_map.get(stage, [])
-        for phase in check_phases:
-            all_files.extend(self.files.get(phase, []))
-        
-        return all_files
+        # Return files categorized for this specific stage
+        return self.files.get(stage, [])
 
 
 def scan_previous_runs(debug_output_dir: Path) -> list[RunInfo]: