From 21a5d669d4dfdd0c91edc1fa6ccba33f47e64434 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 8 Jun 2026 22:20:48 +0000
Subject: [PATCH 1/2] fix: increase agent execute and chat completions timeouts
 to 600s

- Agent completions/async_completions: enforce min 600s timeout for OpenAI client
- Executions: increase HTTP request timeout from 120s to 600s
- Executions.wait: increase default polling timeout from 300s to 600s
- Predictions.wait: increase default polling timeout from 300s to 600s

Orion agent executions and chat completions can take several minutes,
especially for complex tasks. Previous timeouts (120-300s) caused
premature errors and retries.

Co-Authored-By: dinesh@vlm.run <dinesh.andromeda@gmail.com>
---
 vlmrun/client/agent.py       | 4 ++--
 vlmrun/client/executions.py  | 4 ++--
 vlmrun/client/predictions.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/vlmrun/client/agent.py b/vlmrun/client/agent.py
index 1453bfc..f46ff72 100644
--- a/vlmrun/client/agent.py
+++ b/vlmrun/client/agent.py
@@ -320,7 +320,7 @@ def completions(self):
         openai_client = OpenAI(
             api_key=self._client.api_key,
             base_url=base_url,
-            timeout=self._client.timeout,
+            timeout=max(self._client.timeout, 600),
             max_retries=self._client.max_retries,
         )
 
@@ -374,7 +374,7 @@ async def main():
         async_openai_client = AsyncOpenAI(
             api_key=self._client.api_key,
             base_url=base_url,
-            timeout=self._client.timeout,
+            timeout=max(self._client.timeout, 600),
             max_retries=self._client.max_retries,
         )
 
diff --git a/vlmrun/client/executions.py b/vlmrun/client/executions.py
index 0046939..4cae105 100644
--- a/vlmrun/client/executions.py
+++ b/vlmrun/client/executions.py
@@ -20,7 +20,7 @@ def __init__(self, client: "VLMRunProtocol") -> None:
             client: VLM Run API instance
         """
         self._client = client
-        self._requestor = APIRequestor(client, timeout=120)
+        self._requestor = APIRequestor(client, timeout=600)
 
     def list(self, skip: int = 0, limit: int = 10) -> list[AgentExecutionResponse]:
         """List all executions.
@@ -57,7 +57,7 @@ def get(self, id: str) -> AgentExecutionResponse:
         return AgentExecutionResponse(**response)
 
     def wait(
-        self, id: str, timeout: int = 300, sleep: int = 5
+        self, id: str, timeout: int = 600, sleep: int = 5
     ) -> AgentExecutionResponse:
         """Wait for execution to complete.
 
diff --git a/vlmrun/client/predictions.py b/vlmrun/client/predictions.py
index 06b577f..0988dac 100644
--- a/vlmrun/client/predictions.py
+++ b/vlmrun/client/predictions.py
@@ -129,7 +129,7 @@ def get(self, id: str) -> PredictionResponse:
 
         return prediction
 
-    def wait(self, id: str, timeout: int = 300, sleep: int = 5) -> PredictionResponse:
+    def wait(self, id: str, timeout: int = 600, sleep: int = 5) -> PredictionResponse:
         """Wait for prediction to complete.
 
         Args:

From a139779ad3499a008f6626b690225c37832a0d65 Mon Sep 17 00:00:00 2001
From: dineshreddy <dinesh@vlm.run>
Date: Mon, 8 Jun 2026 15:34:02 -0700
Subject: [PATCH 2/2] Apply suggestions from code review

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 vlmrun/client/agent.py      | 4 ++--
 vlmrun/client/executions.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vlmrun/client/agent.py b/vlmrun/client/agent.py
index f46ff72..a1ad669 100644
--- a/vlmrun/client/agent.py
+++ b/vlmrun/client/agent.py
@@ -320,7 +320,7 @@ def completions(self):
         openai_client = OpenAI(
             api_key=self._client.api_key,
             base_url=base_url,
-            timeout=max(self._client.timeout, 600),
+            timeout=self._client.timeout if self._client.timeout is None else max(self._client.timeout, 600),
             max_retries=self._client.max_retries,
         )
 
@@ -374,7 +374,7 @@ async def main():
         async_openai_client = AsyncOpenAI(
             api_key=self._client.api_key,
             base_url=base_url,
-            timeout=max(self._client.timeout, 600),
+            timeout=self._client.timeout if self._client.timeout is None else max(self._client.timeout, 600),
             max_retries=self._client.max_retries,
         )
 
diff --git a/vlmrun/client/executions.py b/vlmrun/client/executions.py
index 4cae105..04d2610 100644
--- a/vlmrun/client/executions.py
+++ b/vlmrun/client/executions.py
@@ -20,7 +20,7 @@ def __init__(self, client: "VLMRunProtocol") -> None:
             client: VLM Run API instance
         """
         self._client = client
-        self._requestor = APIRequestor(client, timeout=600)
+        self._requestor = APIRequestor(client, timeout=120)
 
     def list(self, skip: int = 0, limit: int = 10) -> list[AgentExecutionResponse]:
         """List all executions.