Skip to content

Commit ca8cc46

Browse files
committed
Documentation updates: playwright_suggested_filename meta key, Page type hints
1 parent d825d6f commit ca8cc46

File tree

5 files changed

+29
-8
lines changed

5 files changed

+29
-8
lines changed

README.md

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -448,14 +448,16 @@ This key could be used in conjunction with `playwright_include_page` to make a c
448448
requests using the same page. For instance:
449449

450450
```python
451+
from playwright.async_api import Page
452+
451453
def start_requests(self):
452454
yield scrapy.Request(
453455
url="https://httpbin.org/get",
454456
meta={"playwright": True, "playwright_include_page": True},
455457
)
456458

457459
def parse(self, response, **kwargs):
458-
page = response.meta["playwright_page"]
460+
page: Page = response.meta["playwright_page"]
459461
yield scrapy.Request(
460462
url="https://httpbin.org/headers",
461463
callback=self.parse_headers,
@@ -496,6 +498,20 @@ def parse(self, response, **kwargs):
496498
# {'issuer': 'DigiCert TLS RSA SHA256 2020 CA1', 'protocol': 'TLS 1.3', 'subjectName': 'www.example.org', 'validFrom': 1647216000, 'validTo': 1678838399}
497499
```
498500

501+
### `playwright_suggested_filename`
502+
Type `Optional[str]`, read only
503+
504+
The value of the [`Download.suggested_filename`](https://playwright.dev/python/docs/api/class-download#download-suggested-filename)
505+
attribute when the response is the binary contents of a
506+
[download](https://playwright.dev/python/docs/downloads) (e.g. a PDF file).
507+
Only available for responses that only caused a download. Can be accessed
508+
in the callback via `response.meta['playwright_suggested_filename']`
509+
510+
```python
511+
def parse(self, response, **kwargs):
512+
print(response.meta["playwright_suggested_filename"])
513+
# 'sample_file.pdf'
514+
```
499515

500516
## Receiving Page objects in callbacks
501517

@@ -514,6 +530,7 @@ necessary the spider job could get stuck because of the limit set by the
514530
`PLAYWRIGHT_MAX_PAGES_PER_CONTEXT` setting.
515531

516532
```python
533+
from playwright.async_api import Page
517534
import scrapy
518535

519536
class AwesomeSpiderWithPage(scrapy.Spider):
@@ -528,7 +545,7 @@ class AwesomeSpiderWithPage(scrapy.Spider):
528545
)
529546

530547
def parse_first(self, response):
531-
page = response.meta["playwright_page"]
548+
page: Page = response.meta["playwright_page"]
532549
return scrapy.Request(
533550
url="https://example.com",
534551
callback=self.parse_second,
@@ -537,13 +554,13 @@ class AwesomeSpiderWithPage(scrapy.Spider):
537554
)
538555

539556
async def parse_second(self, response):
540-
page = response.meta["playwright_page"]
557+
page: Page = response.meta["playwright_page"]
541558
title = await page.title() # "Example Domain"
542559
await page.close()
543560
return {"title": title}
544561

545562
async def errback_close_page(self, failure):
546-
page = failure.request.meta["playwright_page"]
563+
page: Page = failure.request.meta["playwright_page"]
547564
await page.close()
548565
```
549566

examples/books.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from pathlib import Path
44
from typing import Generator, Optional
55

6+
from playwright.async_api import Page
67
from scrapy import Spider
78
from scrapy.http.response import Response
89

@@ -51,7 +52,7 @@ def parse(self, response: Response, current_page: Optional[int] = None) -> Gener
5152

5253
async def parse_book(self, response: Response) -> dict:
5354
url_sha256 = hashlib.sha256(response.url.encode("utf-8")).hexdigest()
54-
page = response.meta["playwright_page"]
55+
page: Page = response.meta["playwright_page"]
5556
await page.screenshot(
5657
path=Path(__file__).parent / "books" / f"{url_sha256}.png", full_page=True
5758
)

examples/contexts.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from pathlib import Path
22

3+
from playwright.async_api import Page
34
from scrapy import Spider, Request
45

56

@@ -96,7 +97,7 @@ def start_requests(self):
9697
)
9798

9899
async def parse(self, response, **kwargs):
99-
page = response.meta["playwright_page"]
100+
page: Page = response.meta["playwright_page"]
100101
context_name = response.meta["playwright_context"]
101102
storage_state = await page.context.storage_state()
102103
await page.close()

examples/max_pages.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from playwright.async_api import Page
12
from scrapy import Spider, Request
23

34

@@ -45,5 +46,5 @@ def parse(self, response, **kwargs):
4546
return {"url": response.url}
4647

4748
async def errback(self, failure):
48-
page = failure.request.meta["playwright_page"]
49+
page: Page = failure.request.meta["playwright_page"]
4950
await page.close()

examples/storage.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from playwright.async_api import Page
12
from scrapy import Spider, Request
23
from scrapy_playwright.page import PageMethod
34

@@ -27,7 +28,7 @@ def start_requests(self):
2728
)
2829

2930
async def parse(self, response, **kwargs):
30-
page = response.meta["playwright_page"]
31+
page: Page = response.meta["playwright_page"]
3132
storage_state = await page.context.storage_state()
3233
await page.close()
3334
return {

0 commit comments

Comments
 (0)