1313from typing import Any , cast
1414
1515from aignx .codegen .api .public_api import PublicApi
16+ from aignx .codegen .exceptions import NotFoundException
1617from aignx .codegen .exceptions import ServiceException
1718from aignx .codegen .models import (
1819 CustomMetadataUpdateRequest ,
4243 Retrying ,
4344 retry_if_exception_type ,
4445 stop_after_attempt ,
46+ stop_after_delay ,
4547 wait_exponential_jitter ,
4648)
4749from urllib3 .exceptions import IncompleteRead , PoolError , ProtocolError , ProxyError
@@ -137,7 +139,8 @@ def for_run_id(cls, run_id: str, cache_token: bool = True) -> "Run":
137139 def details (self , nocache : bool = False , hide_platform_queue_position : bool = False ) -> RunData :
138140 """Retrieves the current status of the application run.
139141
140- Retries on network and server errors.
142+ Retries on network and server errors. Additionally retries on
143+ NotFoundException for up to 5 seconds to handle read replica lag.
141144
142145 Args:
143146 nocache (bool): If True, skip reading from cache and fetch fresh data from the API.
@@ -149,24 +152,37 @@ def details(self, nocache: bool = False, hide_platform_queue_position: bool = Fa
149152 RunData: The run data.
150153
151154 Raises:
155+ NotFoundException: If the run is not found after retries.
152156 Exception: If the API request fails.
153157 """
154158
155159 @cached_operation (ttl = settings ().run_cache_ttl , use_token = True )
156160 def details_with_retry (run_id : str ) -> RunData :
161+ def _fetch () -> RunData :
162+ return Retrying (
163+ retry = retry_if_exception_type (exception_types = RETRYABLE_EXCEPTIONS ),
164+ stop = stop_after_attempt (settings ().run_retry_attempts ),
165+ wait = wait_exponential_jitter (
166+ initial = settings ().run_retry_wait_min , max = settings ().run_retry_wait_max
167+ ),
168+ before_sleep = _log_retry_attempt ,
169+ reraise = True ,
170+ )(
171+ lambda : self ._api .get_run_v1_runs_run_id_get (
172+ run_id ,
173+ _request_timeout = settings ().run_timeout ,
174+ _headers = {"User-Agent" : user_agent ()},
175+ )
176+ )
177+
178+ # NOTE(nahua): Outer retry handles NotFoundException (read replica lag)
157179 return Retrying (
158- retry = retry_if_exception_type (exception_types = RETRYABLE_EXCEPTIONS ),
159- stop = stop_after_attempt ( settings (). run_retry_attempts ),
160- wait = wait_exponential_jitter (initial = settings (). run_retry_wait_min , max = settings (). run_retry_wait_max ),
180+ retry = retry_if_exception_type (exception_types = ( NotFoundException ,) ),
181+ stop = stop_after_delay ( 5 ),
182+ wait = wait_exponential_jitter (initial = 0.5 , max = 3 ),
161183 before_sleep = _log_retry_attempt ,
162184 reraise = True ,
163- )(
164- lambda : self ._api .get_run_v1_runs_run_id_get (
165- run_id ,
166- _request_timeout = settings ().run_timeout ,
167- _headers = {"User-Agent" : user_agent ()},
168- )
169- )
185+ )(_fetch )
170186
171187 run_data : RunData = details_with_retry (self .run_id , nocache = nocache ) # type: ignore[call-arg]
172188 if hide_platform_queue_position :
0 commit comments