From 21a5d669d4dfdd0c91edc1fa6ccba33f47e64434 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 8 Jun 2026 22:20:48 +0000 Subject: [PATCH 1/2] fix: increase agent execute and chat completions timeouts to 600s - Agent completions/async_completions: enforce min 600s timeout for OpenAI client - Executions: increase HTTP request timeout from 120s to 600s - Executions.wait: increase default polling timeout from 300s to 600s - Predictions.wait: increase default polling timeout from 300s to 600s Orion agent executions and chat completions can take several minutes, especially for complex tasks. Previous timeouts (120-300s) caused premature errors and retries. Co-Authored-By: dinesh@vlm.run --- vlmrun/client/agent.py | 4 ++-- vlmrun/client/executions.py | 4 ++-- vlmrun/client/predictions.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vlmrun/client/agent.py b/vlmrun/client/agent.py index 1453bfc..f46ff72 100644 --- a/vlmrun/client/agent.py +++ b/vlmrun/client/agent.py @@ -320,7 +320,7 @@ def completions(self): openai_client = OpenAI( api_key=self._client.api_key, base_url=base_url, - timeout=self._client.timeout, + timeout=max(self._client.timeout, 600), max_retries=self._client.max_retries, ) @@ -374,7 +374,7 @@ async def main(): async_openai_client = AsyncOpenAI( api_key=self._client.api_key, base_url=base_url, - timeout=self._client.timeout, + timeout=max(self._client.timeout, 600), max_retries=self._client.max_retries, ) diff --git a/vlmrun/client/executions.py b/vlmrun/client/executions.py index 0046939..4cae105 100644 --- a/vlmrun/client/executions.py +++ b/vlmrun/client/executions.py @@ -20,7 +20,7 @@ def __init__(self, client: "VLMRunProtocol") -> None: client: VLM Run API instance """ self._client = client - self._requestor = APIRequestor(client, timeout=120) + self._requestor = APIRequestor(client, timeout=600) def list(self, skip: int = 0, limit: int = 10) -> list[AgentExecutionResponse]: """List all executions. @@ -57,7 +57,7 @@ def get(self, id: str) -> AgentExecutionResponse: return AgentExecutionResponse(**response) def wait( - self, id: str, timeout: int = 300, sleep: int = 5 + self, id: str, timeout: int = 600, sleep: int = 5 ) -> AgentExecutionResponse: """Wait for execution to complete. diff --git a/vlmrun/client/predictions.py b/vlmrun/client/predictions.py index 06b577f..0988dac 100644 --- a/vlmrun/client/predictions.py +++ b/vlmrun/client/predictions.py @@ -129,7 +129,7 @@ def get(self, id: str) -> PredictionResponse: return prediction - def wait(self, id: str, timeout: int = 300, sleep: int = 5) -> PredictionResponse: + def wait(self, id: str, timeout: int = 600, sleep: int = 5) -> PredictionResponse: """Wait for prediction to complete. Args: From a139779ad3499a008f6626b690225c37832a0d65 Mon Sep 17 00:00:00 2001 From: dineshreddy Date: Mon, 8 Jun 2026 15:34:02 -0700 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vlmrun/client/agent.py | 4 ++-- vlmrun/client/executions.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vlmrun/client/agent.py b/vlmrun/client/agent.py index f46ff72..a1ad669 100644 --- a/vlmrun/client/agent.py +++ b/vlmrun/client/agent.py @@ -320,7 +320,7 @@ def completions(self): openai_client = OpenAI( api_key=self._client.api_key, base_url=base_url, - timeout=max(self._client.timeout, 600), + timeout=self._client.timeout if self._client.timeout is None else max(self._client.timeout, 600), max_retries=self._client.max_retries, ) @@ -374,7 +374,7 @@ async def main(): async_openai_client = AsyncOpenAI( api_key=self._client.api_key, base_url=base_url, - timeout=max(self._client.timeout, 600), + timeout=self._client.timeout if self._client.timeout is None else max(self._client.timeout, 600), max_retries=self._client.max_retries, ) diff --git a/vlmrun/client/executions.py b/vlmrun/client/executions.py index 4cae105..04d2610 100644 --- a/vlmrun/client/executions.py +++ b/vlmrun/client/executions.py @@ -20,7 +20,7 @@ def __init__(self, client: "VLMRunProtocol") -> None: client: VLM Run API instance """ self._client = client - self._requestor = APIRequestor(client, timeout=600) + self._requestor = APIRequestor(client, timeout=120) def list(self, skip: int = 0, limit: int = 10) -> list[AgentExecutionResponse]: """List all executions.