Skip to content

Commit 5500a6e

Browse files
jdemaeyerelacuesta
andauthored
Allow custom PageMethod callbacks (#318)
* Allow custom PageMethod callbacks * Add test for callable page methods * Adjust typing for PageMethod * Remove trailing commas (thank you pylint) * Update docstring * Update docs, tests & types * Remove unused import --------- Co-authored-by: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
1 parent a28631a commit 5500a6e

File tree

4 files changed

+70
-6
lines changed

4 files changed

+70
-6
lines changed

README.md

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -844,10 +844,12 @@ down or clicking links) and you want to handle only the final result in your cal
844844

845845
### `PageMethod` class
846846

847-
#### `scrapy_playwright.page.PageMethod(method: str, *args, **kwargs)`:
847+
#### `scrapy_playwright.page.PageMethod(method: str | callable, *args, **kwargs)`:
848848

849849
Represents a method to be called (and awaited if necessary) on a
850850
`playwright.page.Page` object (e.g. "click", "screenshot", "evaluate", etc).
851+
It's also possible to pass callable objects that will be invoked as callbacks
852+
and receive Playwright Page as argument.
851853
`method` is the name of the method, `*args` and `**kwargs`
852854
are passed when calling such method. The return value
853855
will be stored in the `PageMethod.result` attribute.
@@ -885,8 +887,34 @@ async def parse(self, response, **kwargs):
885887
await page.close()
886888
```
887889

890+
### Passing callable objects
888891

889-
### Supported methods
892+
If a `PageMethod` receives a callable object as its first argument, it will be
893+
called with the page as its first argument. Any additional arguments are passed
894+
to the callable after the page.
895+
896+
```python
897+
async def scroll_page(page: Page) -> str:
898+
await page.wait_for_selector(selector="div.quote")
899+
await page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
900+
await page.wait_for_selector(selector="div.quote:nth-child(11)")
901+
return page.url
902+
903+
904+
class MySpyder(scrapy.Spider):
905+
name = "scroll"
906+
907+
def start_requests(self):
908+
yield Request(
909+
url="https://quotes.toscrape.com/scroll",
910+
meta={
911+
"playwright": True,
912+
"playwright_page_methods": [PageMethod(scroll_page)],
913+
},
914+
)
915+
```
916+
917+
### Supported Playwright methods
890918

891919
Refer to the [upstream docs for the `Page` class](https://playwright.dev/python/docs/api/class-page)
892920
to see available methods.

scrapy_playwright/handler.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import warnings
66
from contextlib import suppress
77
from dataclasses import dataclass, field as dataclass_field
8+
from functools import partial
89
from ipaddress import ip_address
910
from time import time
1011
from typing import Awaitable, Callable, Dict, Optional, Tuple, Type, TypeVar, Union
@@ -607,7 +608,10 @@ async def _apply_page_methods(self, page: Page, request: Request, spider: Spider
607608
for pm in page_methods:
608609
if isinstance(pm, PageMethod):
609610
try:
610-
method = getattr(page, pm.method)
611+
if callable(pm.method):
612+
method = partial(pm.method, page)
613+
else:
614+
method = getattr(page, pm.method)
611615
except AttributeError as ex:
612616
logger.warning(
613617
"Ignoring %r: could not find method",

scrapy_playwright/page.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any
1+
from typing import Any, Callable, Union
22

33

44
__all__ = ["PageMethod"]
@@ -8,10 +8,13 @@ class PageMethod:
88
"""
99
Represents a method to be called (and awaited if necessary) on a
1010
Playwright page, such as "click", "screenshot", "evaluate", etc.
11+
12+
If a callable is received, it will be called with the page as its first argument.
13+
Any additional arguments are passed to the callable after the page.
1114
"""
1215

13-
def __init__(self, method: str, *args, **kwargs) -> None:
14-
self.method: str = method
16+
def __init__(self, method: Union[str, Callable], *args, **kwargs) -> None:
17+
self.method: Union[str, Callable] = method
1518
self.args: tuple = args
1619
self.kwargs: dict = kwargs
1720
self.result: Any = None

tests/tests_asyncio/test_page_methods.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from scrapy import Spider, Request
99
from scrapy.http.response.html import HtmlResponse
1010

11+
from playwright.async_api import Page
1112
from scrapy_playwright.page import PageMethod
1213

1314
from tests import allow_windows, make_handler, assert_correct_response
@@ -186,6 +187,34 @@ async def test_page_method_pdf(self):
186187
if platform.system() != "Windows":
187188
assert get_mimetype(pdf_file) == "application/pdf"
188189

190+
@allow_windows
191+
async def test_page_method_callable(self):
192+
193+
async def scroll_page(page: Page) -> str:
194+
await page.wait_for_selector(selector="div.quote")
195+
await page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
196+
await page.wait_for_selector(selector="div.quote:nth-child(11)")
197+
await page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
198+
await page.wait_for_selector(selector="div.quote:nth-child(21)")
199+
return page.url
200+
201+
async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:
202+
with StaticMockServer() as server:
203+
req = Request(
204+
url=server.urljoin("/scroll.html"),
205+
meta={
206+
"playwright": True,
207+
"playwright_page_methods": {
208+
"callable": PageMethod(scroll_page),
209+
},
210+
},
211+
)
212+
resp = await handler._download_request(req, Spider("foo"))
213+
214+
assert_correct_response(resp, req)
215+
assert len(resp.css("div.quote")) == 30
216+
assert resp.meta["playwright_page_methods"]["callable"].result == resp.url
217+
189218

190219
class TestPageMethodChromium(IsolatedAsyncioTestCase, MixinPageMethodTestCase):
191220
browser_type = "chromium"

0 commit comments

Comments
 (0)