From 206f46203ae6b06c3ce0349ddfc7f424f82bc0b8 Mon Sep 17 00:00:00 2001 From: rramprakash Date: Thu, 4 Jun 2026 12:47:56 +0530 Subject: [PATCH 1/4] firecrawl: upgrade plugin from v1 to v2 API Switch the custom HTTP client (firecrawl_appx.py) from the legacy /v1/* endpoints to /v2/* endpoints, mirroring the paths Dify's own core extractor (api/core/rag/extractor/firecrawl/firecrawl_app.py) already uses: v2/scrape, v2/crawl, v2/map, v2/crawl/{id}. Scrape tool: in v2 the top-level "extract" field was removed; structured / LLM extraction is now expressed as a {"type": "json", schema, prompt, systemPrompt} object inside the formats array. The scrape tool now builds that json format object from the existing schema/systemPrompt/prompt inputs, so the user-facing parameters and outputs are preserved. Bump plugin version 0.0.9 -> 0.1.0. Co-Authored-By: Claude Opus 4.8 (1M context) --- tools/firecrawl/manifest.yaml | 2 +- tools/firecrawl/tools/firecrawl_appx.py | 10 +++++----- tools/firecrawl/tools/scrape.py | 19 ++++++++++++------- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tools/firecrawl/manifest.yaml b/tools/firecrawl/manifest.yaml index 1e4fee805..93f78bfec 100644 --- a/tools/firecrawl/manifest.yaml +++ b/tools/firecrawl/manifest.yaml @@ -32,4 +32,4 @@ tags: - search - utilities type: plugin -version: 0.0.9 +version: 0.1.0 diff --git a/tools/firecrawl/tools/firecrawl_appx.py b/tools/firecrawl/tools/firecrawl_appx.py index 0d19180a8..0f1fe419e 100644 --- a/tools/firecrawl/tools/firecrawl_appx.py +++ b/tools/firecrawl/tools/firecrawl_appx.py @@ -46,7 +46,7 @@ def _request( return None def scrape_url(self, url: str, **kwargs): - endpoint = f"{self.base_url}/v1/scrape" + endpoint = f"{self.base_url}/v2/scrape" data = {"url": url, **kwargs} logger.debug(f"Sent request to {endpoint=} body={data}") response = self._request("POST", endpoint, data) @@ -55,7 +55,7 @@ def scrape_url(self, url: str, **kwargs): return response def map(self, url: str, **kwargs): - endpoint = f"{self.base_url}/v1/map" + endpoint = f"{self.base_url}/v2/map" data = {"url": url, **kwargs} logger.debug(f"Sent request to {endpoint=} body={data}") response = self._request("POST", endpoint, data) @@ -66,7 +66,7 @@ def map(self, url: str, **kwargs): def crawl_url( self, url: str, wait: bool = True, poll_interval: int = 5, idempotency_key: str | None = None, **kwargs ): - endpoint = f"{self.base_url}/v1/crawl" + endpoint = f"{self.base_url}/v2/crawl" headers = self._prepare_headers(idempotency_key) data = {"url": url, **kwargs} logger.debug(f"Sent request to {endpoint=} body={data}") @@ -81,14 +81,14 @@ def crawl_url( return response def check_crawl_status(self, job_id: str): - endpoint = f"{self.base_url}/v1/crawl/{job_id}" + endpoint = f"{self.base_url}/v2/crawl/{job_id}" response = self._request("GET", endpoint) if response is None: raise HTTPError(f"Failed to check status for job {job_id} after multiple retries") return response def cancel_crawl_job(self, job_id: str): - endpoint = f"{self.base_url}/v1/crawl/{job_id}" + endpoint = f"{self.base_url}/v2/crawl/{job_id}" response = self._request("DELETE", endpoint) if response is None: raise HTTPError(f"Failed to cancel job {job_id} after multiple retries") diff --git a/tools/firecrawl/tools/scrape.py b/tools/firecrawl/tools/scrape.py index 49c57768e..c5b933920 100644 --- a/tools/firecrawl/tools/scrape.py +++ b/tools/firecrawl/tools/scrape.py @@ -14,19 +14,24 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag api_key=self.runtime.credentials.get("firecrawl_api_key"), base_url=self.runtime.credentials.get("base_url") ) payload = {} - extract = {} - payload["formats"] = get_array_params(tool_parameters, "formats") + formats = get_array_params(tool_parameters, "formats") or [] payload["onlyMainContent"] = tool_parameters.get("onlyMainContent", True) payload["includeTags"] = get_array_params(tool_parameters, "includeTags") payload["excludeTags"] = get_array_params(tool_parameters, "excludeTags") payload["headers"] = get_json_params(tool_parameters, "headers") payload["waitFor"] = tool_parameters.get("waitFor", 0) payload["timeout"] = tool_parameters.get("timeout", 30000) - extract["schema"] = get_json_params(tool_parameters, "schema") - extract["systemPrompt"] = tool_parameters.get("systemPrompt") - extract["prompt"] = tool_parameters.get("prompt") - extract = {k: v for (k, v) in extract.items() if v not in (None, "")} - payload["extract"] = extract or None + # v2: structured/LLM extraction is expressed as a "json" format object inside + # the formats array (the v1 top-level "extract" field was removed). + json_format = {"type": "json"} + json_format["schema"] = get_json_params(tool_parameters, "schema") + json_format["systemPrompt"] = tool_parameters.get("systemPrompt") + json_format["prompt"] = tool_parameters.get("prompt") + json_format = {k: v for (k, v) in json_format.items() if v not in (None, "")} + if len(json_format) > 1: # has more than just {"type": "json"} + formats = [f for f in formats if f != "extract"] + formats.append(json_format) + payload["formats"] = formats or None payload = {k: v for (k, v) in payload.items() if v not in (None, "")} crawl_result = app.scrape_url(url=tool_parameters["url"], **payload) markdown_result = crawl_result.get("data", {}).get("markdown", "") From a8f4fe9704a1b4f27440e6307e8aa1200eecd3ec Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 13:28:09 +0530 Subject: [PATCH 2/4] =?UTF-8?q?fix(firecrawl):=20v2=20json=20format=20has?= =?UTF-8?q?=20no=20systemPrompt=20=E2=80=94=20fold=20it=20into=20prompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2 JsonFormat only supports {type, prompt, schema}; the carried-over systemPrompt would be silently ignored. Merge any system prompt into the single prompt field. Co-Authored-By: Claude Opus 4.8 (1M context) --- tools/firecrawl/tools/scrape.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/firecrawl/tools/scrape.py b/tools/firecrawl/tools/scrape.py index c5b933920..8dc2d8cdb 100644 --- a/tools/firecrawl/tools/scrape.py +++ b/tools/firecrawl/tools/scrape.py @@ -25,8 +25,13 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag # the formats array (the v1 top-level "extract" field was removed). json_format = {"type": "json"} json_format["schema"] = get_json_params(tool_parameters, "schema") - json_format["systemPrompt"] = tool_parameters.get("systemPrompt") - json_format["prompt"] = tool_parameters.get("prompt") + # v2 removed the json format's "systemPrompt" field (only "prompt"/"schema" remain), + # so fold any provided system prompt into the single "prompt" field. + system_prompt = tool_parameters.get("systemPrompt") + prompt = tool_parameters.get("prompt") + if system_prompt: + prompt = f"{system_prompt}\n\n{prompt}" if prompt else system_prompt + json_format["prompt"] = prompt json_format = {k: v for (k, v) in json_format.items() if v not in (None, "")} if len(json_format) > 1: # has more than just {"type": "json"} formats = [f for f in formats if f != "extract"] From df83332377b4d503280ccd8edd4433bb9a243536 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 13:33:17 +0530 Subject: [PATCH 3/4] fix(firecrawl): map v1 ignoreSitemap -> v2 sitemap enum (v2 removed ignore_sitemap) Co-Authored-By: Claude Opus 4.8 (1M context) --- tools/firecrawl/tools/crawl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/firecrawl/tools/crawl.py b/tools/firecrawl/tools/crawl.py index a6e912e12..4d106ed6a 100644 --- a/tools/firecrawl/tools/crawl.py +++ b/tools/firecrawl/tools/crawl.py @@ -20,7 +20,8 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag payload["excludePaths"] = get_array_params(tool_parameters, "excludePaths") payload["includePaths"] = get_array_params(tool_parameters, "includePaths") payload["maxDepth"] = tool_parameters.get("maxDepth") - payload["ignoreSitemap"] = tool_parameters.get("ignoreSitemap", False) + if tool_parameters.get("ignoreSitemap", False): + payload["sitemap"] = "skip" payload["limit"] = tool_parameters.get("limit", 5) payload["allowBackwardLinks"] = tool_parameters.get("allowBackwardLinks", False) payload["allowExternalLinks"] = tool_parameters.get("allowExternalLinks", False) From d5af790bd789e685e18318fdc4f1e1d3ae902a8a Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 13:33:19 +0530 Subject: [PATCH 4/4] fix(firecrawl): map v1 ignoreSitemap -> v2 sitemap enum (v2 removed ignore_sitemap) Co-Authored-By: Claude Opus 4.8 (1M context) --- tools/firecrawl/tools/map.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/firecrawl/tools/map.py b/tools/firecrawl/tools/map.py index 199280f5b..cd5baccf6 100644 --- a/tools/firecrawl/tools/map.py +++ b/tools/firecrawl/tools/map.py @@ -16,7 +16,8 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag ) payload = {} payload["search"] = tool_parameters.get("search") - payload["ignoreSitemap"] = tool_parameters.get("ignoreSitemap", True) + if tool_parameters.get("ignoreSitemap", True): + payload["sitemap"] = "skip" payload["includeSubdomains"] = tool_parameters.get("includeSubdomains", False) payload["limit"] = tool_parameters.get("limit", 5000) map_result = app.map(url=tool_parameters["url"], **payload)