Skip to content

Commit 86ac741

Browse files
Centralize web batch/crawl get payload serialization
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent ff7ae5f commit 86ac741

File tree

7 files changed

+89
-25
lines changed

7 files changed

+89
-25
lines changed

hyperbrowser/client/managers/async_manager/web/batch_fetch.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
BatchFetchJobResponse,
99
POLLING_ATTEMPTS,
1010
)
11-
from ...serialization_utils import serialize_model_dump_to_dict
1211
from ...web_payload_utils import build_batch_fetch_start_payload
12+
from ...web_payload_utils import build_batch_fetch_get_params
1313
from ....polling import (
1414
build_fetch_operation_name,
1515
build_operation_name,
@@ -53,11 +53,7 @@ async def get_status(self, job_id: str) -> BatchFetchJobStatusResponse:
5353
async def get(
5454
self, job_id: str, params: Optional[GetBatchFetchJobParams] = None
5555
) -> BatchFetchJobResponse:
56-
params_obj = params or GetBatchFetchJobParams()
57-
query_params = serialize_model_dump_to_dict(
58-
params_obj,
59-
error_message="Failed to serialize batch fetch get params",
60-
)
56+
query_params = build_batch_fetch_get_params(params)
6157
response = await self._client.transport.get(
6258
self._client._build_url(f"/web/batch-fetch/{job_id}"),
6359
params=query_params,

hyperbrowser/client/managers/async_manager/web/crawl.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
WebCrawlJobResponse,
99
POLLING_ATTEMPTS,
1010
)
11-
from ...serialization_utils import serialize_model_dump_to_dict
1211
from ...web_payload_utils import build_web_crawl_start_payload
12+
from ...web_payload_utils import build_web_crawl_get_params
1313
from ....polling import (
1414
build_fetch_operation_name,
1515
build_operation_name,
@@ -51,11 +51,7 @@ async def get_status(self, job_id: str) -> WebCrawlJobStatusResponse:
5151
async def get(
5252
self, job_id: str, params: Optional[GetWebCrawlJobParams] = None
5353
) -> WebCrawlJobResponse:
54-
params_obj = params or GetWebCrawlJobParams()
55-
query_params = serialize_model_dump_to_dict(
56-
params_obj,
57-
error_message="Failed to serialize web crawl get params",
58-
)
54+
query_params = build_web_crawl_get_params(params)
5955
response = await self._client.transport.get(
6056
self._client._build_url(f"/web/crawl/{job_id}"),
6157
params=query_params,

hyperbrowser/client/managers/sync_manager/web/batch_fetch.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
BatchFetchJobResponse,
99
POLLING_ATTEMPTS,
1010
)
11-
from ...serialization_utils import serialize_model_dump_to_dict
1211
from ...web_payload_utils import build_batch_fetch_start_payload
12+
from ...web_payload_utils import build_batch_fetch_get_params
1313
from ....polling import (
1414
build_fetch_operation_name,
1515
build_operation_name,
@@ -51,11 +51,7 @@ def get_status(self, job_id: str) -> BatchFetchJobStatusResponse:
5151
def get(
5252
self, job_id: str, params: Optional[GetBatchFetchJobParams] = None
5353
) -> BatchFetchJobResponse:
54-
params_obj = params or GetBatchFetchJobParams()
55-
query_params = serialize_model_dump_to_dict(
56-
params_obj,
57-
error_message="Failed to serialize batch fetch get params",
58-
)
54+
query_params = build_batch_fetch_get_params(params)
5955
response = self._client.transport.get(
6056
self._client._build_url(f"/web/batch-fetch/{job_id}"),
6157
params=query_params,

hyperbrowser/client/managers/sync_manager/web/crawl.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
WebCrawlJobResponse,
99
POLLING_ATTEMPTS,
1010
)
11-
from ...serialization_utils import serialize_model_dump_to_dict
1211
from ...web_payload_utils import build_web_crawl_start_payload
12+
from ...web_payload_utils import build_web_crawl_get_params
1313
from ....polling import (
1414
build_fetch_operation_name,
1515
build_operation_name,
@@ -51,11 +51,7 @@ def get_status(self, job_id: str) -> WebCrawlJobStatusResponse:
5151
def get(
5252
self, job_id: str, params: Optional[GetWebCrawlJobParams] = None
5353
) -> WebCrawlJobResponse:
54-
params_obj = params or GetWebCrawlJobParams()
55-
query_params = serialize_model_dump_to_dict(
56-
params_obj,
57-
error_message="Failed to serialize web crawl get params",
58-
)
54+
query_params = build_web_crawl_get_params(params)
5955
response = self._client.transport.get(
6056
self._client._build_url(f"/web/crawl/{job_id}"),
6157
params=query_params,

hyperbrowser/client/managers/web_payload_utils.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
from typing import Any, Dict
1+
from typing import Any, Dict, Optional
22

33
from hyperbrowser.models import (
44
FetchParams,
5+
GetBatchFetchJobParams,
6+
GetWebCrawlJobParams,
57
StartBatchFetchJobParams,
68
StartWebCrawlJobParams,
79
WebSearchParams,
@@ -43,3 +45,23 @@ def build_web_crawl_start_payload(params: StartWebCrawlJobParams) -> Dict[str, A
4345
)
4446
inject_web_output_schemas(payload, params.outputs.formats if params.outputs else None)
4547
return payload
48+
49+
50+
def build_batch_fetch_get_params(
51+
params: Optional[GetBatchFetchJobParams] = None,
52+
) -> Dict[str, Any]:
53+
params_obj = params or GetBatchFetchJobParams()
54+
return serialize_model_dump_to_dict(
55+
params_obj,
56+
error_message="Failed to serialize batch fetch get params",
57+
)
58+
59+
60+
def build_web_crawl_get_params(
61+
params: Optional[GetWebCrawlJobParams] = None,
62+
) -> Dict[str, Any]:
63+
params_obj = params or GetWebCrawlJobParams()
64+
return serialize_model_dump_to_dict(
65+
params_obj,
66+
error_message="Failed to serialize web crawl get params",
67+
)

tests/test_web_payload_helper_usage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,15 @@ def test_batch_fetch_managers_use_shared_start_payload_helper():
3434
for module_path in BATCH_FETCH_MANAGER_MODULES:
3535
module_text = Path(module_path).read_text(encoding="utf-8")
3636
assert "build_batch_fetch_start_payload(" in module_text
37+
assert "build_batch_fetch_get_params(" in module_text
3738
assert "inject_web_output_schemas(" not in module_text
39+
assert "serialize_model_dump_to_dict(" not in module_text
3840

3941

4042
def test_web_crawl_managers_use_shared_start_payload_helper():
4143
for module_path in WEB_CRAWL_MANAGER_MODULES:
4244
module_text = Path(module_path).read_text(encoding="utf-8")
4345
assert "build_web_crawl_start_payload(" in module_text
46+
assert "build_web_crawl_get_params(" in module_text
4447
assert "inject_web_output_schemas(" not in module_text
48+
assert "serialize_model_dump_to_dict(" not in module_text

tests/test_web_payload_utils.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from hyperbrowser.models import (
66
FetchOutputOptions,
77
FetchParams,
8+
GetBatchFetchJobParams,
9+
GetWebCrawlJobParams,
810
StartBatchFetchJobParams,
911
StartWebCrawlJobParams,
1012
WebSearchParams,
@@ -85,6 +87,34 @@ def test_build_web_crawl_start_payload_returns_serialized_payload():
8587
assert payload["url"] == "https://example.com"
8688

8789

90+
def test_build_batch_fetch_get_params_returns_serialized_payload():
91+
payload = web_payload_utils.build_batch_fetch_get_params(
92+
GetBatchFetchJobParams(page=2, batch_size=50)
93+
)
94+
95+
assert payload == {"page": 2, "batchSize": 50}
96+
97+
98+
def test_build_batch_fetch_get_params_uses_default_params():
99+
payload = web_payload_utils.build_batch_fetch_get_params()
100+
101+
assert payload == {}
102+
103+
104+
def test_build_web_crawl_get_params_returns_serialized_payload():
105+
payload = web_payload_utils.build_web_crawl_get_params(
106+
GetWebCrawlJobParams(page=3, batch_size=25)
107+
)
108+
109+
assert payload == {"page": 3, "batchSize": 25}
110+
111+
112+
def test_build_web_crawl_get_params_uses_default_params():
113+
payload = web_payload_utils.build_web_crawl_get_params()
114+
115+
assert payload == {}
116+
117+
88118
def test_build_web_crawl_start_payload_invokes_schema_injection(
89119
monkeypatch: pytest.MonkeyPatch,
90120
):
@@ -155,3 +185,27 @@ def model_dump(self, **kwargs): # noqa: ARG002
155185
web_payload_utils.build_web_crawl_start_payload(_BrokenWebCrawlParams()) # type: ignore[arg-type]
156186

157187
assert exc_info.value.original_error is None
188+
189+
190+
def test_build_batch_fetch_get_params_wraps_runtime_model_dump_failures():
191+
class _BrokenBatchFetchGetParams:
192+
def model_dump(self, **kwargs): # noqa: ARG002
193+
raise RuntimeError("boom")
194+
195+
with pytest.raises(
196+
HyperbrowserError, match="Failed to serialize batch fetch get params"
197+
) as exc_info:
198+
web_payload_utils.build_batch_fetch_get_params(_BrokenBatchFetchGetParams()) # type: ignore[arg-type]
199+
200+
assert isinstance(exc_info.value.original_error, RuntimeError)
201+
202+
203+
def test_build_web_crawl_get_params_preserves_hyperbrowser_model_dump_failures():
204+
class _BrokenWebCrawlGetParams:
205+
def model_dump(self, **kwargs): # noqa: ARG002
206+
raise HyperbrowserError("custom dump failure")
207+
208+
with pytest.raises(HyperbrowserError, match="custom dump failure") as exc_info:
209+
web_payload_utils.build_web_crawl_get_params(_BrokenWebCrawlGetParams()) # type: ignore[arg-type]
210+
211+
assert exc_info.value.original_error is None

0 commit comments

Comments
 (0)