Add translation validation to graph.py to match translate_only.py

lufftw · lufftw · commit 6d62bbc08738 · 2025-12-15T14:04:40.000+08:00
- Validate translation result is not None
- Validate translation result is not empty/whitespace
- Validate cleaned content is not empty
- Improve error messages with context information
- Ensure consistent behavior between translate_only.py and graph.py
diff --git a/tools/ai-markmap-agent/src/agents/translator.py b/tools/ai-markmap-agent/src/agents/translator.py
@@ -81,10 +81,9 @@ def _load_translation_prompt(self, prompt_file: Path) -> str:
         """Load translation prompt from file with caching."""
         key = str(prompt_file)
         if key not in self._prompt_cache:
-            if prompt_file.exists():
-                self._prompt_cache[key] = prompt_file.read_text(encoding="utf-8")
-            else:
-                raise FileNotFoundError(f"Prompt file not found: {prompt_file}")
+            if not prompt_file.exists():
+                raise FileNotFoundError(f"Translation prompt file not found: {prompt_file}")
+            self._prompt_cache[key] = prompt_file.read_text(encoding="utf-8")
         return self._prompt_cache[key]
     
     def translate(self, content: str, output_type: str) -> str:
@@ -108,6 +107,14 @@ def translate(self, content: str, output_type: str) -> str:
                 self.target_language
             )
         
+        # Validate prompt template
+        if not prompt_template or len(prompt_template.strip()) == 0:
+            raise ValueError(
+                f"Translation prompt template is empty. "
+                f"Target language: {self.target_language}, "
+                f"Prompt file: {ZH_TW_PROMPT_FILE if self.target_language == 'zh-TW' else GENERIC_PROMPT_FILE}"
+            )
+        
         # Build full prompt with content
         prompt = f"""{prompt_template}
 
@@ -117,17 +124,63 @@ def translate(self, content: str, output_type: str) -> str:
 
 {content}"""
         
+        # Validate full prompt
+        if not prompt or len(prompt.strip()) == 0:
+            raise ValueError(
+                f"Built prompt is empty. "
+                f"Template length: {len(prompt_template)}, "
+                f"Content length: {len(content)}"
+            )
+        
         messages = self._build_messages(prompt)
         
         # Save LLM input
         self._save_llm_call_input(messages, "translate")
         
+        # Call LLM
         response = self.llm.invoke(messages)
         
+        # Validate response
+        if response is None:
+            raise ValueError(
+                f"LLM returned None response. "
+                f"Model: {self.model_config.get('model')}, "
+                f"Source: {self.source_language} → Target: {self.target_language}"
+            )
+        
+        # Extract content from response
+        # Handle different response types (AIMessage, str, etc.)
+        if hasattr(response, 'content'):
+            content = response.content
+        elif isinstance(response, str):
+            content = response
+        else:
+            # Try to get content via dict access or other methods
+            try:
+                content = str(response)
+            except Exception as e:
+                raise ValueError(
+                    f"Unable to extract content from response. "
+                    f"Response type: {type(response)}, "
+                    f"Error: {e}"
+                )
+        
+        # Validate content
+        if content is None:
+            raise ValueError(
+                f"LLM response content is None. "
+                f"Model: {self.model_config.get('model')}, "
+                f"Source: {self.source_language} → Target: {self.target_language}. "
+                f"Check API response in debug output files."
+            )
+        
+        # Convert to string if needed
+        content_str = str(content) if not isinstance(content, str) else content
+        
         # Save LLM output
-        self._save_llm_call_output(response.content, "translate")
+        self._save_llm_call_output(content_str, "translate")
         
-        return response.content
+        return content_str
 
 
 def create_translators(config: dict[str, Any] | None = None) -> list[dict[str, Any]]:
diff --git a/tools/ai-markmap-agent/src/graph.py b/tools/ai-markmap-agent/src/graph.py
@@ -766,16 +766,41 @@ def run_translations(state: WorkflowState) -> WorkflowState:
                         debug.save_translation(content, output_key, target_key, is_before=True)
                     
                     translated_content = translator.translate(content, "general")
+                    
+                    # Validate translation result
+                    if not translated_content:
+                        raise ValueError(
+                            f"Translation returned empty content (None). "
+                            f"Source: {source_lang} → Target: {target_lang}, "
+                            f"Model: {model}, Output: {output_key}"
+                        )
+                    if len(translated_content.strip()) == 0:
+                        raise ValueError(
+                            f"Translation returned only whitespace. "
+                            f"Source: {source_lang} → Target: {target_lang}, "
+                            f"Model: {model}, Output: {output_key}"
+                        )
+                    
                     # Clean up LLM artifacts
                     translated_content = clean_translated_content(translated_content)
+                    
+                    # Validate cleaned content
+                    if not translated_content or len(translated_content.strip()) == 0:
+                        raise ValueError(
+                            f"After cleaning, translation is empty. "
+                            f"Source: {source_lang} → Target: {target_lang}, "
+                            f"Model: {model}, Output: {output_key}"
+                        )
+                    
                     translated[target_key] = translated_content
                     print(f"  ✓ Translated: {output_key} → {target_key}")
                     
                     if debug.enabled:
                         debug.save_translation(translated_content, output_key, target_key, is_before=False)
                 except Exception as e:
-                    print(f"  ✗ Translation failed: {e}")
-                    state["errors"].append(f"Translation error: {e}")
+                    error_msg = f"Translation failed for {output_key} → {target_lang}: {e}"
+                    print(f"  ✗ {error_msg}")
+                    state["errors"].append(error_msg)
         
         state["translated_outputs"] = translated
         return state
diff --git a/tools/ai-markmap-agent/translate_only.py b/tools/ai-markmap-agent/translate_only.py
@@ -79,6 +79,10 @@ def translate_file(
     print(f"🌐 Translation: {source_lang} → {target_lang}")
     print(f"🤖 Model: {model}")
     
+    # Check if input file exists
+    if not input_path.exists():
+        raise FileNotFoundError(f"Input file not found: {input_path}")
+    
     # Read input
     content = input_path.read_text(encoding="utf-8")
     print(f"   Read {len(content)} chars, {len(content.splitlines())} lines")