Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/firecrawl/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ tags:
- search
- utilities
type: plugin
version: 0.0.9
version: 0.1.0
3 changes: 2 additions & 1 deletion tools/firecrawl/tools/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
payload["excludePaths"] = get_array_params(tool_parameters, "excludePaths")
payload["includePaths"] = get_array_params(tool_parameters, "includePaths")
payload["maxDepth"] = tool_parameters.get("maxDepth")
payload["ignoreSitemap"] = tool_parameters.get("ignoreSitemap", False)
if tool_parameters.get("ignoreSitemap", False):
payload["sitemap"] = "skip"
payload["limit"] = tool_parameters.get("limit", 5)
payload["allowBackwardLinks"] = tool_parameters.get("allowBackwardLinks", False)
payload["allowExternalLinks"] = tool_parameters.get("allowExternalLinks", False)
Expand Down
10 changes: 5 additions & 5 deletions tools/firecrawl/tools/firecrawl_appx.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _request(
return None

def scrape_url(self, url: str, **kwargs):
endpoint = f"{self.base_url}/v1/scrape"
endpoint = f"{self.base_url}/v2/scrape"
data = {"url": url, **kwargs}
logger.debug(f"Sent request to {endpoint=} body={data}")
response = self._request("POST", endpoint, data)
Expand All @@ -55,7 +55,7 @@ def scrape_url(self, url: str, **kwargs):
return response

def map(self, url: str, **kwargs):
endpoint = f"{self.base_url}/v1/map"
endpoint = f"{self.base_url}/v2/map"
data = {"url": url, **kwargs}
logger.debug(f"Sent request to {endpoint=} body={data}")
response = self._request("POST", endpoint, data)
Expand All @@ -66,7 +66,7 @@ def map(self, url: str, **kwargs):
def crawl_url(
self, url: str, wait: bool = True, poll_interval: int = 5, idempotency_key: str | None = None, **kwargs
):
endpoint = f"{self.base_url}/v1/crawl"
endpoint = f"{self.base_url}/v2/crawl"
headers = self._prepare_headers(idempotency_key)
data = {"url": url, **kwargs}
logger.debug(f"Sent request to {endpoint=} body={data}")
Expand All @@ -81,14 +81,14 @@ def crawl_url(
return response

def check_crawl_status(self, job_id: str):
endpoint = f"{self.base_url}/v1/crawl/{job_id}"
endpoint = f"{self.base_url}/v2/crawl/{job_id}"
response = self._request("GET", endpoint)
if response is None:
raise HTTPError(f"Failed to check status for job {job_id} after multiple retries")
return response

def cancel_crawl_job(self, job_id: str):
endpoint = f"{self.base_url}/v1/crawl/{job_id}"
endpoint = f"{self.base_url}/v2/crawl/{job_id}"
response = self._request("DELETE", endpoint)
if response is None:
raise HTTPError(f"Failed to cancel job {job_id} after multiple retries")
Expand Down
3 changes: 2 additions & 1 deletion tools/firecrawl/tools/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
)
payload = {}
payload["search"] = tool_parameters.get("search")
payload["ignoreSitemap"] = tool_parameters.get("ignoreSitemap", True)
if tool_parameters.get("ignoreSitemap", True):
payload["sitemap"] = "skip"
payload["includeSubdomains"] = tool_parameters.get("includeSubdomains", False)
payload["limit"] = tool_parameters.get("limit", 5000)
map_result = app.map(url=tool_parameters["url"], **payload)
Expand Down
24 changes: 17 additions & 7 deletions tools/firecrawl/tools/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,29 @@ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessag
api_key=self.runtime.credentials.get("firecrawl_api_key"), base_url=self.runtime.credentials.get("base_url")
)
payload = {}
extract = {}
payload["formats"] = get_array_params(tool_parameters, "formats")
formats = get_array_params(tool_parameters, "formats") or []
payload["onlyMainContent"] = tool_parameters.get("onlyMainContent", True)
payload["includeTags"] = get_array_params(tool_parameters, "includeTags")
payload["excludeTags"] = get_array_params(tool_parameters, "excludeTags")
payload["headers"] = get_json_params(tool_parameters, "headers")
payload["waitFor"] = tool_parameters.get("waitFor", 0)
payload["timeout"] = tool_parameters.get("timeout", 30000)
extract["schema"] = get_json_params(tool_parameters, "schema")
extract["systemPrompt"] = tool_parameters.get("systemPrompt")
extract["prompt"] = tool_parameters.get("prompt")
extract = {k: v for (k, v) in extract.items() if v not in (None, "")}
payload["extract"] = extract or None
# v2: structured/LLM extraction is expressed as a "json" format object inside
# the formats array (the v1 top-level "extract" field was removed).
json_format = {"type": "json"}
json_format["schema"] = get_json_params(tool_parameters, "schema")
# v2 removed the json format's "systemPrompt" field (only "prompt"/"schema" remain),
# so fold any provided system prompt into the single "prompt" field.
system_prompt = tool_parameters.get("systemPrompt")
prompt = tool_parameters.get("prompt")
if system_prompt:
prompt = f"{system_prompt}\n\n{prompt}" if prompt else system_prompt
json_format["prompt"] = prompt
json_format = {k: v for (k, v) in json_format.items() if v not in (None, "")}
if len(json_format) > 1: # has more than just {"type": "json"}
formats = [f for f in formats if f != "extract"]
formats.append(json_format)
payload["formats"] = formats or None
payload = {k: v for (k, v) in payload.items() if v not in (None, "")}
crawl_result = app.scrape_url(url=tool_parameters["url"], **payload)
markdown_result = crawl_result.get("data", {}).get("markdown", "")
Expand Down