Skip to content

Commit ddf37ac

Browse files
Reuse pagination merge callbacks across job managers
Co-authored-by: Shri Sukhani <shrisukhani@users.noreply.github.com>
1 parent c00a15f commit ddf37ac

File tree

14 files changed

+125
-61
lines changed

14 files changed

+125
-61
lines changed

hyperbrowser/client/managers/async_manager/crawl.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
retry_operation_async,
1111
)
1212
from ..job_pagination_utils import (
13+
build_job_paginated_page_merge_callback,
1314
initialize_job_paginated_response,
14-
merge_job_paginated_page_response,
1515
)
1616
from ..serialization_utils import (
1717
serialize_model_dump_or_default,
@@ -113,13 +113,6 @@ async def start_and_wait(
113113
total_counter_alias="totalCrawledPages",
114114
)
115115

116-
def merge_page_response(page_response: CrawlJobResponse) -> None:
117-
merge_job_paginated_page_response(
118-
job_response,
119-
page_response,
120-
total_counter_attr="total_crawled_pages",
121-
)
122-
123116
await collect_paginated_results_async(
124117
operation_name=operation_name,
125118
get_next_page=lambda page: self.get(
@@ -132,7 +125,10 @@ def merge_page_response(page_response: CrawlJobResponse) -> None:
132125
get_total_page_batches=lambda page_response: (
133126
page_response.total_page_batches
134127
),
135-
on_page_success=merge_page_response,
128+
on_page_success=build_job_paginated_page_merge_callback(
129+
job_response=job_response,
130+
total_counter_attr="total_crawled_pages",
131+
),
136132
max_wait_seconds=max_wait_seconds,
137133
max_attempts=POLLING_ATTEMPTS,
138134
retry_delay_seconds=0.5,

hyperbrowser/client/managers/async_manager/scrape.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
wait_for_job_result_async,
1212
)
1313
from ..job_pagination_utils import (
14+
build_job_paginated_page_merge_callback,
1415
initialize_job_paginated_response,
15-
merge_job_paginated_page_response,
1616
)
1717
from ..serialization_utils import (
1818
serialize_model_dump_or_default,
@@ -120,13 +120,6 @@ async def start_and_wait(
120120
total_counter_alias="totalScrapedPages",
121121
)
122122

123-
def merge_page_response(page_response: BatchScrapeJobResponse) -> None:
124-
merge_job_paginated_page_response(
125-
job_response,
126-
page_response,
127-
total_counter_attr="total_scraped_pages",
128-
)
129-
130123
await collect_paginated_results_async(
131124
operation_name=operation_name,
132125
get_next_page=lambda page: self.get(
@@ -139,7 +132,10 @@ def merge_page_response(page_response: BatchScrapeJobResponse) -> None:
139132
get_total_page_batches=lambda page_response: (
140133
page_response.total_page_batches
141134
),
142-
on_page_success=merge_page_response,
135+
on_page_success=build_job_paginated_page_merge_callback(
136+
job_response=job_response,
137+
total_counter_attr="total_scraped_pages",
138+
),
143139
max_wait_seconds=max_wait_seconds,
144140
max_attempts=POLLING_ATTEMPTS,
145141
retry_delay_seconds=0.5,

hyperbrowser/client/managers/async_manager/web/batch_fetch.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from ...web_payload_utils import build_batch_fetch_start_payload
1212
from ...web_payload_utils import build_batch_fetch_get_params
1313
from ...web_pagination_utils import (
14+
build_paginated_page_merge_callback,
1415
initialize_paginated_job_response,
15-
merge_paginated_page_response,
1616
)
1717
from ....polling import (
1818
build_fetch_operation_name,
@@ -106,9 +106,6 @@ async def start_and_wait(
106106
status=job_status,
107107
)
108108

109-
def merge_page_response(page_response: BatchFetchJobResponse) -> None:
110-
merge_paginated_page_response(job_response, page_response)
111-
112109
await collect_paginated_results_async(
113110
operation_name=operation_name,
114111
get_next_page=lambda page: self.get(
@@ -121,7 +118,9 @@ def merge_page_response(page_response: BatchFetchJobResponse) -> None:
121118
get_total_page_batches=lambda page_response: (
122119
page_response.total_page_batches
123120
),
124-
on_page_success=merge_page_response,
121+
on_page_success=build_paginated_page_merge_callback(
122+
job_response=job_response,
123+
),
125124
max_wait_seconds=max_wait_seconds,
126125
max_attempts=POLLING_ATTEMPTS,
127126
retry_delay_seconds=0.5,

hyperbrowser/client/managers/async_manager/web/crawl.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from ...web_payload_utils import build_web_crawl_start_payload
1212
from ...web_payload_utils import build_web_crawl_get_params
1313
from ...web_pagination_utils import (
14+
build_paginated_page_merge_callback,
1415
initialize_paginated_job_response,
15-
merge_paginated_page_response,
1616
)
1717
from ....polling import (
1818
build_fetch_operation_name,
@@ -104,9 +104,6 @@ async def start_and_wait(
104104
status=job_status,
105105
)
106106

107-
def merge_page_response(page_response: WebCrawlJobResponse) -> None:
108-
merge_paginated_page_response(job_response, page_response)
109-
110107
await collect_paginated_results_async(
111108
operation_name=operation_name,
112109
get_next_page=lambda page: self.get(
@@ -119,7 +116,9 @@ def merge_page_response(page_response: WebCrawlJobResponse) -> None:
119116
get_total_page_batches=lambda page_response: (
120117
page_response.total_page_batches
121118
),
122-
on_page_success=merge_page_response,
119+
on_page_success=build_paginated_page_merge_callback(
120+
job_response=job_response,
121+
),
123122
max_wait_seconds=max_wait_seconds,
124123
max_attempts=POLLING_ATTEMPTS,
125124
retry_delay_seconds=0.5,

hyperbrowser/client/managers/job_pagination_utils.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Type, TypeVar
1+
from typing import Any, Callable, Type, TypeVar
22

33
T = TypeVar("T")
44

@@ -39,3 +39,18 @@ def merge_job_paginated_page_response(
3939
job_response.total_page_batches = page_response.total_page_batches
4040
job_response.batch_size = page_response.batch_size
4141
job_response.error = page_response.error
42+
43+
44+
def build_job_paginated_page_merge_callback(
45+
*,
46+
job_response: Any,
47+
total_counter_attr: str,
48+
) -> Callable[[Any], None]:
49+
def _merge_callback(page_response: Any) -> None:
50+
merge_job_paginated_page_response(
51+
job_response,
52+
page_response,
53+
total_counter_attr=total_counter_attr,
54+
)
55+
56+
return _merge_callback

hyperbrowser/client/managers/sync_manager/crawl.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
retry_operation,
1111
)
1212
from ..job_pagination_utils import (
13+
build_job_paginated_page_merge_callback,
1314
initialize_job_paginated_response,
14-
merge_job_paginated_page_response,
1515
)
1616
from ..serialization_utils import (
1717
serialize_model_dump_or_default,
@@ -113,13 +113,6 @@ def start_and_wait(
113113
total_counter_alias="totalCrawledPages",
114114
)
115115

116-
def merge_page_response(page_response: CrawlJobResponse) -> None:
117-
merge_job_paginated_page_response(
118-
job_response,
119-
page_response,
120-
total_counter_attr="total_crawled_pages",
121-
)
122-
123116
collect_paginated_results(
124117
operation_name=operation_name,
125118
get_next_page=lambda page: self.get(
@@ -132,7 +125,10 @@ def merge_page_response(page_response: CrawlJobResponse) -> None:
132125
get_total_page_batches=lambda page_response: (
133126
page_response.total_page_batches
134127
),
135-
on_page_success=merge_page_response,
128+
on_page_success=build_job_paginated_page_merge_callback(
129+
job_response=job_response,
130+
total_counter_attr="total_crawled_pages",
131+
),
136132
max_wait_seconds=max_wait_seconds,
137133
max_attempts=POLLING_ATTEMPTS,
138134
retry_delay_seconds=0.5,

hyperbrowser/client/managers/sync_manager/scrape.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
wait_for_job_result,
1212
)
1313
from ..job_pagination_utils import (
14+
build_job_paginated_page_merge_callback,
1415
initialize_job_paginated_response,
15-
merge_job_paginated_page_response,
1616
)
1717
from ..serialization_utils import (
1818
serialize_model_dump_or_default,
@@ -118,13 +118,6 @@ def start_and_wait(
118118
total_counter_alias="totalScrapedPages",
119119
)
120120

121-
def merge_page_response(page_response: BatchScrapeJobResponse) -> None:
122-
merge_job_paginated_page_response(
123-
job_response,
124-
page_response,
125-
total_counter_attr="total_scraped_pages",
126-
)
127-
128121
collect_paginated_results(
129122
operation_name=operation_name,
130123
get_next_page=lambda page: self.get(
@@ -137,7 +130,10 @@ def merge_page_response(page_response: BatchScrapeJobResponse) -> None:
137130
get_total_page_batches=lambda page_response: (
138131
page_response.total_page_batches
139132
),
140-
on_page_success=merge_page_response,
133+
on_page_success=build_job_paginated_page_merge_callback(
134+
job_response=job_response,
135+
total_counter_attr="total_scraped_pages",
136+
),
141137
max_wait_seconds=max_wait_seconds,
142138
max_attempts=POLLING_ATTEMPTS,
143139
retry_delay_seconds=0.5,

hyperbrowser/client/managers/sync_manager/web/batch_fetch.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from ...web_payload_utils import build_batch_fetch_start_payload
1212
from ...web_payload_utils import build_batch_fetch_get_params
1313
from ...web_pagination_utils import (
14+
build_paginated_page_merge_callback,
1415
initialize_paginated_job_response,
15-
merge_paginated_page_response,
1616
)
1717
from ....polling import (
1818
build_fetch_operation_name,
@@ -104,9 +104,6 @@ def start_and_wait(
104104
status=job_status,
105105
)
106106

107-
def merge_page_response(page_response: BatchFetchJobResponse) -> None:
108-
merge_paginated_page_response(job_response, page_response)
109-
110107
collect_paginated_results(
111108
operation_name=operation_name,
112109
get_next_page=lambda page: self.get(
@@ -119,7 +116,9 @@ def merge_page_response(page_response: BatchFetchJobResponse) -> None:
119116
get_total_page_batches=lambda page_response: (
120117
page_response.total_page_batches
121118
),
122-
on_page_success=merge_page_response,
119+
on_page_success=build_paginated_page_merge_callback(
120+
job_response=job_response,
121+
),
123122
max_wait_seconds=max_wait_seconds,
124123
max_attempts=POLLING_ATTEMPTS,
125124
retry_delay_seconds=0.5,

hyperbrowser/client/managers/sync_manager/web/crawl.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from ...web_payload_utils import build_web_crawl_start_payload
1212
from ...web_payload_utils import build_web_crawl_get_params
1313
from ...web_pagination_utils import (
14+
build_paginated_page_merge_callback,
1415
initialize_paginated_job_response,
15-
merge_paginated_page_response,
1616
)
1717
from ....polling import (
1818
build_fetch_operation_name,
@@ -104,9 +104,6 @@ def start_and_wait(
104104
status=job_status,
105105
)
106106

107-
def merge_page_response(page_response: WebCrawlJobResponse) -> None:
108-
merge_paginated_page_response(job_response, page_response)
109-
110107
collect_paginated_results(
111108
operation_name=operation_name,
112109
get_next_page=lambda page: self.get(
@@ -119,7 +116,9 @@ def merge_page_response(page_response: WebCrawlJobResponse) -> None:
119116
get_total_page_batches=lambda page_response: (
120117
page_response.total_page_batches
121118
),
122-
on_page_success=merge_page_response,
119+
on_page_success=build_paginated_page_merge_callback(
120+
job_response=job_response,
121+
),
123122
max_wait_seconds=max_wait_seconds,
124123
max_attempts=POLLING_ATTEMPTS,
125124
retry_delay_seconds=0.5,

hyperbrowser/client/managers/web_pagination_utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Type, TypeVar
1+
from typing import Any, Callable, Type, TypeVar
22

33
T = TypeVar("T")
44

@@ -29,3 +29,10 @@ def merge_paginated_page_response(job_response: Any, page_response: Any) -> None
2929
job_response.total_page_batches = page_response.total_page_batches
3030
job_response.batch_size = page_response.batch_size
3131
job_response.error = page_response.error
32+
33+
34+
def build_paginated_page_merge_callback(*, job_response: Any) -> Callable[[Any], None]:
35+
def _merge_callback(page_response: Any) -> None:
36+
merge_paginated_page_response(job_response, page_response)
37+
38+
return _merge_callback

0 commit comments

Comments
 (0)