33# =============================================================================
44# Translates Markmap content between languages.
55# Prompts are loaded from prompts/translator/*.md files.
6+ #
7+ # Configuration:
8+ # - max_tokens: Set in config.yaml under output.naming.languages.{lang}
9+ # Example for gpt-5.2: max_tokens: 128000 (max output capacity)
10+ # Example for gpt-4o: max_tokens: 16384
11+ # Default: 8192 if not specified
12+ #
13+ # - Model specifications:
14+ # * gpt-5.2: max output 128,000 tokens, context window 400,000 tokens
15+ # * gpt-4o: max output typically 16,384 tokens
16+ # * gpt-4: max output typically 8,192 tokens
17+ #
18+ # Error Handling:
19+ # - All errors include request details (model, prompt size, tokens) for debugging
20+ # - Debug output files saved for API request/response inspection
621# =============================================================================
722
823from __future__ import annotations
@@ -51,11 +66,29 @@ def __init__(
5166
5267 config = config or ConfigLoader .get_config ()
5368
69+ # Get max_tokens from config
70+ # Check language-specific translator config first
71+ naming = config .get ("output" , {}).get ("naming" , {})
72+ languages_config = naming .get ("languages" , {})
73+
74+ max_tokens = None
75+ if isinstance (languages_config , dict ):
76+ for lang , lang_settings in languages_config .items ():
77+ if lang_settings .get ("mode" ) == "translate" :
78+ # Check if this is the target language
79+ if lang == target_language :
80+ max_tokens = lang_settings .get ("translator_max_tokens" )
81+ break
82+
83+ # Fallback to default if not configured
84+ if max_tokens is None :
85+ max_tokens = 8192 # Default for safety
86+
5487 # Create model config for translator
5588 model_config = {
5689 "model" : model ,
5790 "temperature" : 0.3 , # Lower temperature for translation accuracy
58- "max_tokens" : 8192 ,
91+ "max_tokens" : max_tokens ,
5992 }
6093
6194 # Initialize prompt cache BEFORE super().__init__()
@@ -199,14 +232,41 @@ def translate(self, content: str, output_type: str) -> str:
199232 except Exception as e :
200233 elapsed = time .time () - start_time
201234 print (f" ❌ API call failed after { elapsed :.1f} seconds" )
202- raise
235+
236+ # Enhanced error with request details for debugging
237+ model_name = self .model_config .get ("model" , "unknown" )
238+ max_tokens = self .model_config .get ("max_tokens" , 8192 )
239+ estimated_input_tokens = prompt_size / 4
240+
241+ error_details = (
242+ f"API request failed.\n "
243+ f" Request details:\n "
244+ f" Model: { model_name } \n "
245+ f" Source: { self .source_language } → Target: { self .target_language } \n "
246+ f" Prompt size: { prompt_size :,} chars (~{ estimated_input_tokens :.0f} tokens)\n "
247+ f" Content size: { content_size :,} chars\n "
248+ f" Max output tokens: { max_tokens :,} \n "
249+ f" Elapsed time: { elapsed :.1f} seconds\n "
250+ f" Error: { type (e ).__name__ } : { str (e )} \n "
251+ f" Debug: Check saved LLM input file for full request details."
252+ )
253+ raise RuntimeError (error_details ) from e
203254
204255 # Validate response
205256 if response is None :
257+ model_name = self .model_config .get ('model' , 'unknown' )
258+ max_tokens = self .model_config .get ("max_tokens" , 8192 )
259+ estimated_input_tokens = prompt_size / 4
260+
206261 raise ValueError (
207- f"LLM returned None response. "
208- f"Model: { self .model_config .get ('model' )} , "
209- f"Source: { self .source_language } → Target: { self .target_language } "
262+ f"LLM returned None response.\n "
263+ f" Request details:\n "
264+ f" Model: { model_name } \n "
265+ f" Source: { self .source_language } → Target: { self .target_language } \n "
266+ f" Prompt size: { prompt_size :,} chars (~{ estimated_input_tokens :.0f} tokens)\n "
267+ f" Content size: { content_size :,} chars\n "
268+ f" Max output tokens: { max_tokens :,} \n "
269+ f" Debug: Check saved LLM input file for full request details."
210270 )
211271
212272 # Extract content from response
@@ -228,11 +288,19 @@ def translate(self, content: str, output_type: str) -> str:
228288
229289 # Validate content
230290 if content is None :
291+ model_name = self .model_config .get ('model' , 'unknown' )
292+ max_tokens = self .model_config .get ("max_tokens" , 8192 )
293+ estimated_input_tokens = prompt_size / 4
294+
231295 raise ValueError (
232- f"LLM response content is None. "
233- f"Model: { self .model_config .get ('model' )} , "
234- f"Source: { self .source_language } → Target: { self .target_language } . "
235- f"Check API response in debug output files."
296+ f"LLM response content is None.\n "
297+ f" Request details:\n "
298+ f" Model: { model_name } \n "
299+ f" Source: { self .source_language } → Target: { self .target_language } \n "
300+ f" Prompt size: { prompt_size :,} chars (~{ estimated_input_tokens :.0f} tokens)\n "
301+ f" Content size: { content_size :,} chars\n "
302+ f" Max output tokens: { max_tokens :,} \n "
303+ f" Debug: Check saved LLM input/output files for full request/response details."
236304 )
237305
238306 # Convert to string if needed
@@ -245,24 +313,27 @@ def translate(self, content: str, output_type: str) -> str:
245313 # Validate content is not empty
246314 if not content_str or len (content_str .strip ()) == 0 :
247315 model_name = self .model_config .get ('model' , 'unknown' )
248- prompt_size = len (prompt ) if 'prompt' in locals () else 0
249316 max_tokens = self .model_config .get ("max_tokens" , 8192 )
250- estimated_tokens = prompt_size / 4 if prompt_size > 0 else 0
317+ estimated_input_tokens = prompt_size / 4
251318
252319 error_msg = (
253320 f"LLM returned empty response.\n "
254- f" Model: { model_name } \n "
255- f" Source: { self .source_language } → Target: { self .target_language } \n "
256- f" Response length: { len (content_str )} chars\n "
257- f" Prompt size: { prompt_size :,} chars (~{ estimated_tokens :.0f} tokens, max_tokens: { max_tokens } )\n "
258- f" Debug output has been saved (check debug files for actual API response).\n "
321+ f" Request details:\n "
322+ f" Model: { model_name } \n "
323+ f" Source: { self .source_language } → Target: { self .target_language } \n "
324+ f" Prompt size: { prompt_size :,} chars (~{ estimated_input_tokens :.0f} tokens)\n "
325+ f" Content size: { content_size :,} chars\n "
326+ f" Max output tokens: { max_tokens :,} \n "
327+ f" Response length: { len (content_str )} chars\n "
328+ f" Debug: Check saved LLM input/output files for full request/response details.\n "
259329 f" Possible causes:\n "
260330 f" 1. Invalid model name '{ model_name } ' (verify it's a valid model for your API provider)\n "
261- f" 2. Prompt too large: { prompt_size :,} chars may exceed model context limit\n "
262- f" 3. API quota/rate limit exceeded\n "
263- f" 4. API returned empty content due to content filtering or safety checks\n "
264- f" 5. Prompt format issue causing model to reject the request\n "
265- f" 6. Network/API connection issue"
331+ f" 2. Prompt too large: { prompt_size :,} chars (~{ estimated_input_tokens :.0f} tokens) may exceed model context limit\n "
332+ f" 3. Max tokens too small: { max_tokens :,} may be insufficient (check config.yaml translator_max_tokens)\n "
333+ f" 4. API quota/rate limit exceeded\n "
334+ f" 5. API returned empty content due to content filtering or safety checks\n "
335+ f" 6. Prompt format issue causing model to reject the request\n "
336+ f" 7. Network/API connection issue"
266337 )
267338 raise ValueError (error_msg )
268339
0 commit comments