Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions templates/python-scrapy/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Feel free to add your Python dependencies below. For formatting guidelines, see:
# https://pip.pypa.io/en/latest/reference/requirements-file-format/

apify[scrapy] < 4.0.0
scrapy < 3.0.0
apify[scrapy] >= 3.3.0, < 4.0.0
scrapy >= 2.14.0, < 3.0.0
7 changes: 0 additions & 7 deletions templates/python-scrapy/src/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,8 @@
We recommend you do not modify this file unless you really know what you are doing.
"""

# ruff: noqa: E402
from __future__ import annotations

from scrapy.utils.reactor import install_reactor

# Install Twisted's asyncio reactor before importing any other Twisted or
# Scrapy components.
install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor')

import os

from apify.scrapy import initialize_logging, run_scrapy_actor
Expand Down
10 changes: 4 additions & 6 deletions templates/python-scrapy/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@

from apify import Actor
from apify.scrapy import apply_apify_settings
from scrapy.crawler import CrawlerRunner
from scrapy.utils.defer import deferred_to_future
from scrapy.crawler import AsyncCrawlerRunner

# Import your Scrapy spider here.
from .spiders import TitleSpider as Spider
Expand All @@ -42,11 +41,10 @@ async def main() -> None:
# Apply Apify settings, which will override the Scrapy project settings.
settings = apply_apify_settings(proxy_config=proxy_config)

# Create CrawlerRunner and execute the Scrapy spider.
crawler_runner = CrawlerRunner(settings)
crawl_deferred = crawler_runner.crawl(
# Create AsyncCrawlerRunner and execute the Scrapy spider.
crawler_runner = AsyncCrawlerRunner(settings)
await crawler_runner.crawl(
Spider,
start_urls=start_urls,
allowed_domains=allowed_domains,
)
await deferred_to_future(crawl_deferred)
2 changes: 2 additions & 0 deletions templates/python-scrapy/src/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
TELNETCONSOLE_ENABLED = False
# Do not change the Twisted reactor unless you really know what you are doing.
TWISTED_REACTOR = 'twisted.internet.asyncioreactor.AsyncioSelectorReactor'
HTTPCACHE_ENABLED = True
HTTPCACHE_EXPIRATION_SECS = 7200
ITEM_PIPELINES = {
'src.pipelines.TitleItemPipeline': 123,
}
Expand Down
4 changes: 2 additions & 2 deletions wrappers/python-scrapy/requirements_apify.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Add your dependencies here.
# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
# for how to format them
apify[scrapy] < 3.0
scrapy < 3.0
apify[scrapy] >= 3.3.0, < 4.0.0
scrapy >= 2.14.0, < 3.0.0
7 changes: 0 additions & 7 deletions wrappers/python-scrapy/{projectFolder}/__main__.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,9 @@

We recommend you do not modify this file unless you really know what you are doing.
"""
# ruff: noqa: E402

from __future__ import annotations

from scrapy.utils.reactor import install_reactor

# Install Twisted's asyncio reactor before importing any other Twisted or
# Scrapy components.
install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor')

import os

from apify.scrapy import initialize_logging, run_scrapy_actor
Expand Down
10 changes: 4 additions & 6 deletions wrappers/python-scrapy/{projectFolder}/main.template.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@

from apify import Actor
from apify.scrapy import apply_apify_settings
from scrapy.crawler import CrawlerRunner
from scrapy.utils.defer import deferred_to_future
from scrapy.crawler import AsyncCrawlerRunner

# Import your Scrapy spider here.
from {{spider_module_name}} import {{spider_class_name}} as Spider
Expand All @@ -44,11 +43,10 @@ async def main() -> None:
# Apply Apify settings, which will override the Scrapy project settings.
settings = apply_apify_settings(proxy_config=proxy_config)

# Create CrawlerRunner and execute the Scrapy spider.
crawler_runner = CrawlerRunner(settings)
crawl_deferred = crawler_runner.crawl(
# Create AsyncCrawlerRunner and execute the Scrapy spider.
crawler_runner = AsyncCrawlerRunner(settings)
await crawler_runner.crawl(
Spider,
start_urls=start_urls,
allowed_domains=allowed_domains,
)
await deferred_to_future(crawl_deferred)