3232from scrapy_playwright .page import PageMethod
3333from scrapy_playwright ._utils import (
3434 _encode_body ,
35+ _get_header_value ,
3536 _get_page_content ,
3637 _is_safe_close_error ,
3738 _maybe_await ,
@@ -239,27 +240,6 @@ async def _create_page(self, request: Request, spider: Spider) -> Page:
239240 self ._set_max_concurrent_page_count ()
240241 if self .default_navigation_timeout is not None :
241242 page .set_default_navigation_timeout (self .default_navigation_timeout )
242- page_init_callback = request .meta .get ("playwright_page_init_callback" )
243- if page_init_callback :
244- try :
245- page_init_callback = load_object (page_init_callback )
246- await page_init_callback (page , request )
247- except Exception as ex :
248- logger .warning (
249- "[Context=%s] Page init callback exception for %s exc_type=%s exc_msg=%s" ,
250- context_name ,
251- repr (request ),
252- type (ex ),
253- str (ex ),
254- extra = {
255- "spider" : spider ,
256- "context_name" : context_name ,
257- "scrapy_request_url" : request .url ,
258- "scrapy_request_method" : request .method ,
259- "exception" : ex ,
260- },
261- exc_info = True ,
262- )
263243
264244 page .on ("close" , self ._make_close_page_callback (context_name ))
265245 page .on ("crash" , self ._make_close_page_callback (context_name ))
@@ -399,10 +379,6 @@ async def _download_request_with_page(
399379 )
400380 request .meta ["download_latency" ] = time () - start_time
401381
402- if not request .meta .get ("playwright_include_page" ):
403- await page .close ()
404- self .stats .inc_value ("playwright/page_count/closed" )
405-
406382 server_ip_address = None
407383 with suppress (AttributeError , KeyError , TypeError , ValueError ):
408384 server_addr = await response .server_addr ()
@@ -411,6 +387,10 @@ async def _download_request_with_page(
411387 with suppress (AttributeError ):
412388 request .meta ["playwright_security_details" ] = await response .security_details ()
413389
390+ if not request .meta .get ("playwright_include_page" ):
391+ await page .close ()
392+ self .stats .inc_value ("playwright/page_count/closed" )
393+
414394 body , encoding = _encode_body (headers = headers , text = body_str )
415395 respcls = responsetypes .from_args (headers = headers , url = page .url , body = body )
416396 return respcls (
@@ -683,19 +663,22 @@ async def _maybe_execute_page_init_callback(
683663
684664def _make_request_logger (context_name : str , spider : Spider ) -> Callable :
685665 async def _log_request (request : PlaywrightRequest ) -> None :
686- referrer = await request .header_value ("referer" )
666+ log_args = [context_name , request .method .upper (), request .url , request .resource_type ]
667+ referrer = await _get_header_value (request , "referer" )
668+ if referrer :
669+ log_args .append (referrer )
670+ log_msg = "[Context=%s] Request: <%s %s> (resource type: %s, referrer: %s)"
671+ else :
672+ log_msg = "[Context=%s] Request: <%s %s> (resource type: %s)"
687673 logger .debug (
688- "[Context=%s] Request: <%s %s> (resource type: %s, referrer: %s)" ,
689- context_name ,
690- request .method .upper (),
691- request .url ,
692- request .resource_type ,
693- referrer ,
674+ log_msg ,
675+ * log_args ,
694676 extra = {
695677 "spider" : spider ,
696678 "context_name" : context_name ,
697679 "playwright_request_url" : request .url ,
698680 "playwright_request_method" : request .method ,
681+ "playwright_resource_type" : request .resource_type ,
699682 },
700683 )
701684
@@ -704,16 +687,15 @@ async def _log_request(request: PlaywrightRequest) -> None:
704687
705688def _make_response_logger (context_name : str , spider : Spider ) -> Callable :
706689 async def _log_response (response : PlaywrightResponse ) -> None :
707- referrer = await response .header_value ("referer" )
708- log_args = [context_name , response .status , response .url , referrer ]
709- if 300 <= response .status < 400 :
710- location = await response .header_value ("location" )
690+ log_args = [context_name , response .status , response .url ]
691+ location = await _get_header_value (response , "location" )
692+ if location :
711693 log_args .append (location )
712- msg = "[Context=%s] Response: <%i %s> (referrer: %s, location: %s)"
694+ log_msg = "[Context=%s] Response: <%i %s> (location: %s)"
713695 else :
714- msg = "[Context=%s] Response: <%i %s> (referrer: %s) "
696+ log_msg = "[Context=%s] Response: <%i %s>"
715697 logger .debug (
716- msg ,
698+ log_msg ,
717699 * log_args ,
718700 extra = {
719701 "spider" : spider ,
0 commit comments