Skip to content

Commit 8d69197

Browse files
Harden crawl and batch-scrape get param serialization
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent f679dc8 commit 8d69197

File tree

5 files changed

+273
-6
lines changed

5 files changed

+273
-6
lines changed

hyperbrowser/client/managers/async_manager/crawl.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,20 @@ async def get(
6060
self, job_id: str, params: Optional[GetCrawlJobParams] = None
6161
) -> CrawlJobResponse:
6262
params_obj = params or GetCrawlJobParams()
63+
try:
64+
query_params = params_obj.model_dump(exclude_none=True, by_alias=True)
65+
except HyperbrowserError:
66+
raise
67+
except Exception as exc:
68+
raise HyperbrowserError(
69+
"Failed to serialize crawl get params",
70+
original_error=exc,
71+
) from exc
72+
if type(query_params) is not dict:
73+
raise HyperbrowserError("Failed to serialize crawl get params")
6374
response = await self._client.transport.get(
6475
self._client._build_url(f"/crawl/{job_id}"),
65-
params=params_obj.model_dump(exclude_none=True, by_alias=True),
76+
params=query_params,
6677
)
6778
return parse_response_model(
6879
response.data,

hyperbrowser/client/managers/async_manager/scrape.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,20 @@ async def get(
6767
self, job_id: str, params: Optional[GetBatchScrapeJobParams] = None
6868
) -> BatchScrapeJobResponse:
6969
params_obj = params or GetBatchScrapeJobParams()
70+
try:
71+
query_params = params_obj.model_dump(exclude_none=True, by_alias=True)
72+
except HyperbrowserError:
73+
raise
74+
except Exception as exc:
75+
raise HyperbrowserError(
76+
"Failed to serialize batch scrape get params",
77+
original_error=exc,
78+
) from exc
79+
if type(query_params) is not dict:
80+
raise HyperbrowserError("Failed to serialize batch scrape get params")
7081
response = await self._client.transport.get(
7182
self._client._build_url(f"/scrape/batch/{job_id}"),
72-
params=params_obj.model_dump(exclude_none=True, by_alias=True),
83+
params=query_params,
7384
)
7485
return parse_response_model(
7586
response.data,

hyperbrowser/client/managers/sync_manager/crawl.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,20 @@ def get(
6060
self, job_id: str, params: Optional[GetCrawlJobParams] = None
6161
) -> CrawlJobResponse:
6262
params_obj = params or GetCrawlJobParams()
63+
try:
64+
query_params = params_obj.model_dump(exclude_none=True, by_alias=True)
65+
except HyperbrowserError:
66+
raise
67+
except Exception as exc:
68+
raise HyperbrowserError(
69+
"Failed to serialize crawl get params",
70+
original_error=exc,
71+
) from exc
72+
if type(query_params) is not dict:
73+
raise HyperbrowserError("Failed to serialize crawl get params")
6374
response = self._client.transport.get(
6475
self._client._build_url(f"/crawl/{job_id}"),
65-
params=params_obj.model_dump(exclude_none=True, by_alias=True),
76+
params=query_params,
6677
)
6778
return parse_response_model(
6879
response.data,

hyperbrowser/client/managers/sync_manager/scrape.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,20 @@ def get(
6565
self, job_id: str, params: Optional[GetBatchScrapeJobParams] = None
6666
) -> BatchScrapeJobResponse:
6767
params_obj = params or GetBatchScrapeJobParams()
68+
try:
69+
query_params = params_obj.model_dump(exclude_none=True, by_alias=True)
70+
except HyperbrowserError:
71+
raise
72+
except Exception as exc:
73+
raise HyperbrowserError(
74+
"Failed to serialize batch scrape get params",
75+
original_error=exc,
76+
) from exc
77+
if type(query_params) is not dict:
78+
raise HyperbrowserError("Failed to serialize batch scrape get params")
6879
response = self._client.transport.get(
6980
self._client._build_url(f"/scrape/batch/{job_id}"),
70-
params=params_obj.model_dump(exclude_none=True, by_alias=True),
81+
params=query_params,
7182
)
7283
return parse_response_model(
7384
response.data,

tests/test_job_manager_serialization.py

Lines changed: 225 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,13 @@
2929
ScrapeManager as SyncScrapeManager,
3030
)
3131
from hyperbrowser.exceptions import HyperbrowserError
32-
from hyperbrowser.models.crawl import StartCrawlJobParams
32+
from hyperbrowser.models.crawl import GetCrawlJobParams, StartCrawlJobParams
3333
from hyperbrowser.models.extract import StartExtractJobParams
34-
from hyperbrowser.models.scrape import StartBatchScrapeJobParams, StartScrapeJobParams
34+
from hyperbrowser.models.scrape import (
35+
GetBatchScrapeJobParams,
36+
StartBatchScrapeJobParams,
37+
StartScrapeJobParams,
38+
)
3539

3640

3741
class _SyncTransport:
@@ -175,6 +179,49 @@ def _build_url(self, path: str) -> str:
175179
),
176180
)
177181

182+
_SyncGetCase = Tuple[
183+
str,
184+
Type[Any],
185+
Type[Any],
186+
Callable[[], Any],
187+
str,
188+
]
189+
_AsyncGetCase = _SyncGetCase
190+
191+
SYNC_GET_CASES: tuple[_SyncGetCase, ...] = (
192+
(
193+
"batch-scrape-get",
194+
SyncBatchScrapeManager,
195+
GetBatchScrapeJobParams,
196+
lambda: GetBatchScrapeJobParams(page=1),
197+
"Failed to serialize batch scrape get params",
198+
),
199+
(
200+
"crawl-get",
201+
SyncCrawlManager,
202+
GetCrawlJobParams,
203+
lambda: GetCrawlJobParams(page=1),
204+
"Failed to serialize crawl get params",
205+
),
206+
)
207+
208+
ASYNC_GET_CASES: tuple[_AsyncGetCase, ...] = (
209+
(
210+
"batch-scrape-get",
211+
AsyncBatchScrapeManager,
212+
GetBatchScrapeJobParams,
213+
lambda: GetBatchScrapeJobParams(page=1),
214+
"Failed to serialize batch scrape get params",
215+
),
216+
(
217+
"crawl-get",
218+
AsyncCrawlManager,
219+
GetCrawlJobParams,
220+
lambda: GetCrawlJobParams(page=1),
221+
"Failed to serialize crawl get params",
222+
),
223+
)
224+
178225

179226
@pytest.mark.parametrize(
180227
"_, manager_class, __, build_params, expected_url, expected_payload, ___",
@@ -420,3 +467,179 @@ async def run() -> None:
420467
assert exc_info.value.original_error is None
421468

422469
asyncio.run(run())
470+
471+
472+
@pytest.mark.parametrize(
473+
"_, manager_class, params_class, build_params, expected_error",
474+
SYNC_GET_CASES,
475+
ids=[case[0] for case in SYNC_GET_CASES],
476+
)
477+
def test_sync_job_get_wraps_param_serialization_errors(
478+
_: str,
479+
manager_class: Type[Any],
480+
params_class: Type[Any],
481+
build_params: Callable[[], Any],
482+
expected_error: str,
483+
monkeypatch: pytest.MonkeyPatch,
484+
):
485+
manager = manager_class(_SyncClient())
486+
487+
def _raise_model_dump_error(*args, **kwargs):
488+
_ = args
489+
_ = kwargs
490+
raise RuntimeError("broken model_dump")
491+
492+
monkeypatch.setattr(params_class, "model_dump", _raise_model_dump_error)
493+
494+
with pytest.raises(HyperbrowserError, match=expected_error) as exc_info:
495+
manager.get("job_123", build_params())
496+
497+
assert isinstance(exc_info.value.original_error, RuntimeError)
498+
499+
500+
@pytest.mark.parametrize(
501+
"_, manager_class, params_class, build_params, expected_error",
502+
SYNC_GET_CASES,
503+
ids=[case[0] for case in SYNC_GET_CASES],
504+
)
505+
def test_sync_job_get_preserves_hyperbrowser_serialization_errors(
506+
_: str,
507+
manager_class: Type[Any],
508+
params_class: Type[Any],
509+
build_params: Callable[[], Any],
510+
expected_error: str,
511+
monkeypatch: pytest.MonkeyPatch,
512+
):
513+
manager = manager_class(_SyncClient())
514+
515+
def _raise_model_dump_error(*args, **kwargs):
516+
_ = args
517+
_ = kwargs
518+
raise HyperbrowserError("custom model_dump failure")
519+
520+
monkeypatch.setattr(params_class, "model_dump", _raise_model_dump_error)
521+
522+
with pytest.raises(
523+
HyperbrowserError, match="custom model_dump failure"
524+
) as exc_info:
525+
manager.get("job_123", build_params())
526+
527+
assert exc_info.value.original_error is None
528+
529+
530+
@pytest.mark.parametrize(
531+
"_, manager_class, params_class, build_params, expected_error",
532+
SYNC_GET_CASES,
533+
ids=[case[0] for case in SYNC_GET_CASES],
534+
)
535+
def test_sync_job_get_rejects_non_dict_serialized_params(
536+
_: str,
537+
manager_class: Type[Any],
538+
params_class: Type[Any],
539+
build_params: Callable[[], Any],
540+
expected_error: str,
541+
monkeypatch: pytest.MonkeyPatch,
542+
):
543+
manager = manager_class(_SyncClient())
544+
545+
monkeypatch.setattr(
546+
params_class,
547+
"model_dump",
548+
lambda *args, **kwargs: MappingProxyType({"page": 1}),
549+
)
550+
551+
with pytest.raises(HyperbrowserError, match=expected_error) as exc_info:
552+
manager.get("job_123", build_params())
553+
554+
assert exc_info.value.original_error is None
555+
556+
557+
@pytest.mark.parametrize(
558+
"_, manager_class, params_class, build_params, expected_error",
559+
ASYNC_GET_CASES,
560+
ids=[case[0] for case in ASYNC_GET_CASES],
561+
)
562+
def test_async_job_get_wraps_param_serialization_errors(
563+
_: str,
564+
manager_class: Type[Any],
565+
params_class: Type[Any],
566+
build_params: Callable[[], Any],
567+
expected_error: str,
568+
monkeypatch: pytest.MonkeyPatch,
569+
):
570+
manager = manager_class(_AsyncClient())
571+
572+
def _raise_model_dump_error(*args, **kwargs):
573+
_ = args
574+
_ = kwargs
575+
raise RuntimeError("broken model_dump")
576+
577+
monkeypatch.setattr(params_class, "model_dump", _raise_model_dump_error)
578+
579+
async def run() -> None:
580+
with pytest.raises(HyperbrowserError, match=expected_error) as exc_info:
581+
await manager.get("job_123", build_params())
582+
assert isinstance(exc_info.value.original_error, RuntimeError)
583+
584+
asyncio.run(run())
585+
586+
587+
@pytest.mark.parametrize(
588+
"_, manager_class, params_class, build_params, expected_error",
589+
ASYNC_GET_CASES,
590+
ids=[case[0] for case in ASYNC_GET_CASES],
591+
)
592+
def test_async_job_get_preserves_hyperbrowser_serialization_errors(
593+
_: str,
594+
manager_class: Type[Any],
595+
params_class: Type[Any],
596+
build_params: Callable[[], Any],
597+
expected_error: str,
598+
monkeypatch: pytest.MonkeyPatch,
599+
):
600+
manager = manager_class(_AsyncClient())
601+
602+
def _raise_model_dump_error(*args, **kwargs):
603+
_ = args
604+
_ = kwargs
605+
raise HyperbrowserError("custom model_dump failure")
606+
607+
monkeypatch.setattr(params_class, "model_dump", _raise_model_dump_error)
608+
609+
async def run() -> None:
610+
with pytest.raises(
611+
HyperbrowserError, match="custom model_dump failure"
612+
) as exc_info:
613+
await manager.get("job_123", build_params())
614+
assert exc_info.value.original_error is None
615+
616+
asyncio.run(run())
617+
618+
619+
@pytest.mark.parametrize(
620+
"_, manager_class, params_class, build_params, expected_error",
621+
ASYNC_GET_CASES,
622+
ids=[case[0] for case in ASYNC_GET_CASES],
623+
)
624+
def test_async_job_get_rejects_non_dict_serialized_params(
625+
_: str,
626+
manager_class: Type[Any],
627+
params_class: Type[Any],
628+
build_params: Callable[[], Any],
629+
expected_error: str,
630+
monkeypatch: pytest.MonkeyPatch,
631+
):
632+
manager = manager_class(_AsyncClient())
633+
634+
monkeypatch.setattr(
635+
params_class,
636+
"model_dump",
637+
lambda *args, **kwargs: MappingProxyType({"page": 1}),
638+
)
639+
640+
async def run() -> None:
641+
with pytest.raises(HyperbrowserError, match=expected_error) as exc_info:
642+
await manager.get("job_123", build_params())
643+
assert exc_info.value.original_error is None
644+
645+
asyncio.run(run())

0 commit comments

Comments
 (0)