Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions api/core/rag/extractor/watercrawl/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@ class WaterCrawlBadRequestError(WaterCrawlError):
def __init__(self, response):
self.status_code = response.status_code
self.response = response
data = response.json()
self.message = data.get("message", "Unknown error occurred")
try:
data = response.json()
except (ValueError, json.JSONDecodeError):
data = {}
self.message = data.get("message") or (
response.text if hasattr(response, "text") and response.text else "Unknown error occurred"
)
self.errors = data.get("errors", {})
super().__init__(self.message)

Expand Down
4 changes: 3 additions & 1 deletion api/services/auth/jina/jina.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from core.helper.http_client_pooling import get_pooled_http_client
from services.auth.api_key_auth_base import ApiKeyAuthBase, AuthCredentials

_CREDENTIAL_VALIDATION_TIMEOUT = httpx.Timeout(10.0, connect=3.0)

_http_client: httpx.Client = get_pooled_http_client(
"auth:jina",
lambda: httpx.Client(limits=httpx.Limits(max_keepalive_connections=50, max_connections=100)),
Expand Down Expand Up @@ -39,7 +41,7 @@ def _prepare_headers(self):
return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}

def _post_request(self, url, data, headers):
return _http_client.post(url, headers=headers, json=data)
return _http_client.post(url, headers=headers, json=data, timeout=_CREDENTIAL_VALIDATION_TIMEOUT)

def _handle_error(self, response):
if response.status_code in {402, 409, 500}:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,48 @@ def test_permission_and_authentication_error_strings(self):
assert "exceeding your WaterCrawl API limits" in str(permission)
assert "API key is invalid or expired" in str(authentication)

def test_bad_request_error_with_non_json_body_falls_back_to_text(self):
"""When the response body is not valid JSON, the exception must
fall back to ``response.text`` instead of leaking JSONDecodeError."""
response = _response(400, text="Internal Server Error")
response.json.side_effect = ValueError("Not JSON")

err = WaterCrawlBadRequestError(response)

assert err.message == "Internal Server Error"
assert err.errors == {}
assert "Internal Server Error" in str(err)

def test_bad_request_error_with_non_json_empty_body(self):
"""When the response body is not JSON and text is empty,
a generic message should be used."""
response = _response(400, text="")
response.json.side_effect = ValueError("Not JSON")

err = WaterCrawlBadRequestError(response)

assert err.message == "Unknown error occurred"
assert err.errors == {}

def test_permission_error_with_non_json_body(self):
"""WaterCrawlPermissionError must not raise JSONDecodeError on
non-JSON error responses."""
response = _response(403, text="Rate limit exceeded")
response.json.side_effect = ValueError("Not JSON")

err = WaterCrawlPermissionError(response)
assert "exceeding your WaterCrawl API limits" in str(err)
assert "Rate limit exceeded" in str(err)

def test_authentication_error_with_non_json_body(self):
"""WaterCrawlAuthenticationError must not raise JSONDecodeError on
non-JSON error responses."""
response = _response(401, text="Unauthorized")
response.json.side_effect = ValueError("Not JSON")

err = WaterCrawlAuthenticationError(response)
assert "API key is invalid or expired" in str(err)


class TestBaseAPIClient:
def test_init_session_builds_expected_headers(self, monkeypatch: pytest.MonkeyPatch):
Expand Down
21 changes: 20 additions & 1 deletion api/tests/unit_tests/services/auth/test_jina_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import httpx
import pytest

from services.auth.jina.jina import JinaAuth
from services.auth.jina.jina import JinaAuth, _CREDENTIAL_VALIDATION_TIMEOUT


class TestJinaAuth:
Expand Down Expand Up @@ -51,6 +51,7 @@ def test_should_validate_valid_credentials_successfully(self, mock_post):
"https://r.jina.ai",
headers={"Content-Type": "application/json", "Authorization": "Bearer test_api_key_123"},
json={"url": "https://example.com"},
timeout=_CREDENTIAL_VALIDATION_TIMEOUT,
)

@patch("services.auth.jina.jina._http_client.post", autospec=True)
Expand Down Expand Up @@ -185,3 +186,21 @@ def test_should_not_expose_api_key_in_error_messages(self):
with pytest.raises(ValueError) as exc_info:
JinaAuth({"auth_type": "basic", "config": {"api_key": "super_secret_key_12345"}})
assert "super_secret_key_12345" not in str(exc_info.value)

@patch("services.auth.jina.jina._http_client.post", autospec=True)
def test_should_pass_bounded_timeout_to_credential_validation(self, mock_post):
"""Credential validation must not use the default unbounded timeout."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_post.return_value = mock_response

credentials = {"auth_type": "bearer", "config": {"api_key": "test_api_key_123"}}
auth = JinaAuth(credentials)
auth.validate_credentials()

call_kwargs = mock_post.call_args
assert "timeout" in call_kwargs.kwargs, "timeout keyword is missing from _post_request"
timeout = call_kwargs.kwargs["timeout"]
assert isinstance(timeout, httpx.Timeout)
assert timeout.connect == _CREDENTIAL_VALIDATION_TIMEOUT.connect
assert timeout.read == _CREDENTIAL_VALIDATION_TIMEOUT.read