@@ -98,7 +98,7 @@ class AwesomeSpider(scrapy.Spider):
9898 meta = {" playwright" : True },
9999 )
100100
101- def parse (self , response ):
101+ def parse (self , response , ** kwargs ):
102102 # 'response' contains the page as seen by the browser
103103 return {" url" : response.url}
104104```
@@ -138,6 +138,37 @@ PLAYWRIGHT_LAUNCH_OPTIONS = {
138138}
139139```
140140
141+ ### ` PLAYWRIGHT_CDP_URL `
142+ Type ` Optional[str] ` , default ` None `
143+
144+ The endpoint of a remote Chromium browser to connect using the
145+ [ Chrome DevTools Protocol] ( https://chromedevtools.github.io/devtools-protocol/ ) ,
146+ via [ ` BrowserType.connect_over_cdp ` ] ( https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect-over-cdp ) .
147+ If this setting is used:
148+ * all non-persistent contexts will be created on the connected remote browser
149+ * the ` PLAYWRIGHT_LAUNCH_OPTIONS ` setting is ignored
150+ * the ` PLAYWRIGHT_BROWSER_TYPE ` setting must not be set to a value different than "chromium"
151+
152+ ``` python
153+ PLAYWRIGHT_CDP_URL = " http://localhost:9222"
154+ ```
155+
156+ ### ` PLAYWRIGHT_CDP_KWARGS `
157+ Type ` dict[str, Any] ` , default ` {} `
158+
159+ Additional keyword arguments to be passed to
160+ [ ` BrowserType.connect_over_cdp ` ] ( https://playwright.dev/python/docs/api/class-browsertype#browser-type-connect-over-cdp )
161+ when using ` PLAYWRIGHT_CDP_URL ` . The ` endpoint_url ` key is always ignored,
162+ ` PLAYWRIGHT_CDP_URL ` is used instead.
163+
164+ ``` python
165+ PLAYWRIGHT_CDP_KWARGS = {
166+ " slow_mo" : 1000 ,
167+ " timeout" : 10 * 1000
168+ }
169+ ```
170+
171+
141172### ` PLAYWRIGHT_CONTEXTS `
142173Type ` dict[str, dict] ` , default ` {} `
143174
@@ -412,7 +443,7 @@ def start_requests(self):
412443 meta = {" playwright" : True , " playwright_include_page" : True },
413444 )
414445
415- def parse (self , response ):
446+ def parse (self , response , ** kwargs ):
416447 page = response.meta[" playwright_page" ]
417448 yield scrapy.Request(
418449 url = " https://httpbin.org/headers" ,
@@ -449,7 +480,7 @@ about the give response. Only available for HTTPS requests. Could be accessed
449480in the callback via ` response.meta['playwright_security_details'] `
450481
451482``` python
452- def parse (self , response ):
483+ def parse (self , response , ** kwargs ):
453484 print (response.meta[" playwright_security_details" ])
454485 # {'issuer': 'DigiCert TLS RSA SHA256 2020 CA1', 'protocol': 'TLS 1.3', 'subjectName': 'www.example.org', 'validFrom': 1647216000, 'validTo': 1678838399}
455486```
@@ -597,7 +628,7 @@ you can access a context though the corresponding [`Page.context`](https://playw
597628attribute, and await [ ` close ` ] ( https://playwright.dev/python/docs/api/class-browsercontext#browser-context-close ) on it.
598629
599630``` python
600- def parse (self , response ):
631+ def parse (self , response , ** kwargs ):
601632 yield scrapy.Request(
602633 url = " https://example.org" ,
603634 callback = self .parse_in_new_context,
@@ -660,7 +691,7 @@ class ProxySpider(Spider):
660691 def start_requests (self ):
661692 yield Request(" http://httpbin.org/get" , meta = {" playwright" : True })
662693
663- def parse (self , response ):
694+ def parse (self , response , ** kwargs ):
664695 print (response.text)
665696```
666697
@@ -729,7 +760,7 @@ def start_requests(self):
729760 },
730761 )
731762
732- def parse (self , response ):
763+ def parse (self , response , ** kwargs ):
733764 screenshot = response.meta[" playwright_page_methods" ][0 ]
734765 # screenshot.result contains the image's bytes
735766```
@@ -742,7 +773,7 @@ def start_requests(self):
742773 meta = {" playwright" : True , " playwright_include_page" : True },
743774 )
744775
745- async def parse (self , response ):
776+ async def parse (self , response , ** kwargs ):
746777 page = response.meta[" playwright_page" ]
747778 screenshot = await page.screenshot(path = " example.png" , full_page = True )
748779 # screenshot contains the image's bytes
@@ -834,7 +865,7 @@ class ClickAndSavePdfSpider(scrapy.Spider):
834865 ),
835866 )
836867
837- def parse (self , response ):
868+ def parse (self , response , ** kwargs ):
838869 pdf_bytes = response.meta[" playwright_page_methods" ][" pdf" ].result
839870 with open (" iana.pdf" , " wb" ) as fp:
840871 fp.write(pdf_bytes)
@@ -861,7 +892,7 @@ class ScrollSpider(scrapy.Spider):
861892 ),
862893 )
863894
864- async def parse (self , response ):
895+ async def parse (self , response , ** kwargs ):
865896 page = response.meta[" playwright_page" ]
866897 await page.screenshot(path = " quotes.png" , full_page = True )
867898 await page.close()
0 commit comments