From 3d2274bb2cbaea6894579dff7589387cc2a23864 Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Sat, 28 Feb 2026 09:32:37 +0100
Subject: [PATCH] feat: Update Scrapy template and wrapper to use SDK 3.3.0

Use Scrapy's native AsyncCrawlerRunner (requires Scrapy >= 2.14.0) instead
of CrawlerRunner + deferred_to_future. Let run_scrapy_actor() handle reactor
installation internally, removing manual install_reactor() boilerplate from
__main__.py. Add HTTP cache settings to settings.py.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 templates/python-scrapy/requirements.txt               |  4 ++--
 templates/python-scrapy/src/__main__.py                |  7 -------
 templates/python-scrapy/src/main.py                    | 10 ++++------
 templates/python-scrapy/src/settings.py                |  2 ++
 wrappers/python-scrapy/requirements_apify.txt          |  4 ++--
 .../python-scrapy/{projectFolder}/__main__.template.py |  7 -------
 .../python-scrapy/{projectFolder}/main.template.py     | 10 ++++------
 7 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/templates/python-scrapy/requirements.txt b/templates/python-scrapy/requirements.txt
index d0e8c0f69..3113b42ed 100644
--- a/templates/python-scrapy/requirements.txt
+++ b/templates/python-scrapy/requirements.txt
@@ -1,5 +1,5 @@
 # Feel free to add your Python dependencies below. For formatting guidelines, see:
 # https://pip.pypa.io/en/latest/reference/requirements-file-format/
 
-apify[scrapy] < 4.0.0
-scrapy < 3.0.0
+apify[scrapy] >= 3.3.0, < 4.0.0
+scrapy >= 2.14.0, < 3.0.0
diff --git a/templates/python-scrapy/src/__main__.py b/templates/python-scrapy/src/__main__.py
index 25a28e8ae..a4ce7c281 100644
--- a/templates/python-scrapy/src/__main__.py
+++ b/templates/python-scrapy/src/__main__.py
@@ -10,15 +10,8 @@
 We recommend you do not modify this file unless you really know what you are doing.
 """
 
-# ruff: noqa: E402
 from __future__ import annotations
 
-from scrapy.utils.reactor import install_reactor
-
-# Install Twisted's asyncio reactor before importing any other Twisted or
-# Scrapy components.
-install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor')
-
 import os
 
 from apify.scrapy import initialize_logging, run_scrapy_actor
diff --git a/templates/python-scrapy/src/main.py b/templates/python-scrapy/src/main.py
index 090e7dcd0..8efb5496a 100644
--- a/templates/python-scrapy/src/main.py
+++ b/templates/python-scrapy/src/main.py
@@ -23,8 +23,7 @@
 
 from apify import Actor
 from apify.scrapy import apply_apify_settings
-from scrapy.crawler import CrawlerRunner
-from scrapy.utils.defer import deferred_to_future
+from scrapy.crawler import AsyncCrawlerRunner
 
 # Import your Scrapy spider here.
 from .spiders import TitleSpider as Spider
@@ -42,11 +41,10 @@ async def main() -> None:
         # Apply Apify settings, which will override the Scrapy project settings.
         settings = apply_apify_settings(proxy_config=proxy_config)
 
-        # Create CrawlerRunner and execute the Scrapy spider.
-        crawler_runner = CrawlerRunner(settings)
-        crawl_deferred = crawler_runner.crawl(
+        # Create AsyncCrawlerRunner and execute the Scrapy spider.
+        crawler_runner = AsyncCrawlerRunner(settings)
+        await crawler_runner.crawl(
             Spider,
             start_urls=start_urls,
             allowed_domains=allowed_domains,
         )
-        await deferred_to_future(crawl_deferred)
diff --git a/templates/python-scrapy/src/settings.py b/templates/python-scrapy/src/settings.py
index de2710401..e516d1c8a 100644
--- a/templates/python-scrapy/src/settings.py
+++ b/templates/python-scrapy/src/settings.py
@@ -15,6 +15,8 @@
 TELNETCONSOLE_ENABLED = False
 # Do not change the Twisted reactor unless you really know what you are doing.
 TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'
+HTTPCACHE_ENABLED = True
+HTTPCACHE_EXPIRATION_SECS = 7200
 ITEM_PIPELINES = {
     'src.pipelines.TitleItemPipeline': 123,
 }
diff --git a/wrappers/python-scrapy/requirements_apify.txt b/wrappers/python-scrapy/requirements_apify.txt
index 0a2f0f629..71177265e 100644
--- a/wrappers/python-scrapy/requirements_apify.txt
+++ b/wrappers/python-scrapy/requirements_apify.txt
@@ -1,5 +1,5 @@
 # Add your dependencies here.
 # See https://pip.pypa.io/en/latest/reference/requirements-file-format/
 # for how to format them
-apify[scrapy] < 3.0
-scrapy < 3.0
+apify[scrapy] >= 3.3.0, < 4.0.0
+scrapy >= 2.14.0, < 3.0.0
diff --git a/wrappers/python-scrapy/{projectFolder}/__main__.template.py b/wrappers/python-scrapy/{projectFolder}/__main__.template.py
index 7069990c6..a4ce7c281 100644
--- a/wrappers/python-scrapy/{projectFolder}/__main__.template.py
+++ b/wrappers/python-scrapy/{projectFolder}/__main__.template.py
@@ -9,16 +9,9 @@
 
 We recommend you do not modify this file unless you really know what you are doing.
 """
-# ruff: noqa: E402
 
 from __future__ import annotations
 
-from scrapy.utils.reactor import install_reactor
-
-# Install Twisted's asyncio reactor before importing any other Twisted or
-# Scrapy components.
-install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor')
-
 import os
 
 from apify.scrapy import initialize_logging, run_scrapy_actor
diff --git a/wrappers/python-scrapy/{projectFolder}/main.template.py b/wrappers/python-scrapy/{projectFolder}/main.template.py
index c78990897..e5d26f29e 100644
--- a/wrappers/python-scrapy/{projectFolder}/main.template.py
+++ b/wrappers/python-scrapy/{projectFolder}/main.template.py
@@ -22,8 +22,7 @@
 
 from apify import Actor
 from apify.scrapy import apply_apify_settings
-from scrapy.crawler import CrawlerRunner
-from scrapy.utils.defer import deferred_to_future
+from scrapy.crawler import AsyncCrawlerRunner
 
 # Import your Scrapy spider here.
 from {{spider_module_name}} import {{spider_class_name}} as Spider
@@ -44,11 +43,10 @@ async def main() -> None:
         # Apply Apify settings, which will override the Scrapy project settings.
         settings = apply_apify_settings(proxy_config=proxy_config)
 
-        # Create CrawlerRunner and execute the Scrapy spider.
-        crawler_runner = CrawlerRunner(settings)
-        crawl_deferred = crawler_runner.crawl(
+        # Create AsyncCrawlerRunner and execute the Scrapy spider.
+        crawler_runner = AsyncCrawlerRunner(settings)
+        await crawler_runner.crawl(
             Spider,
             start_urls=start_urls,
             allowed_domains=allowed_domains,
         )
-        await deferred_to_future(crawl_deferred)