chore(backend/deps): Bump firecrawl-py from 2.16.3 to 4.3.1 in /autogpt_platform/backend (#10809)

Bumps [firecrawl-py](https://github.com/firecrawl/firecrawl) from 2.16.3
to 4.3.1.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a
href="https://github.com/firecrawl/firecrawl/commits">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=firecrawl-py&package-manager=pip&previous-version=2.16.3&new-version=4.3.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting `@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

<!-- CURSOR_SUMMARY -->
---

> [!NOTE]
> Upgrade firecrawl-py to v4.3.6 and refactor firecrawl blocks to new v4
API, formats handling, method names, and response fields.
> 
> - **Dependencies**
> - Bump `firecrawl-py` from `2.16.3` to `4.3.6` (adds `httpx`, updates
`pydantic>=2`).
> - **Firecrawl API migration**
>   - Centralize `ScrapeFormat` in `backend/blocks/firecrawl/_api.py`.
> - Add `_format_utils.convert_to_format_options` to map `ScrapeFormat`
(incl. `screenshot@fullPage`) to v4 `FormatOption`/`ScreenshotFormat`.
> - Switch to v4 types (`firecrawl.v2.types.ScrapeOptions`); adopt
snake_case fields (`only_main_content`, `max_age`, `wait_for`).
> - Rename methods: `crawl_url` → `crawl`, `scrape_url` → `scrape`,
`map_url` → `map`.
> - Normalize response attributes: `rawHtml` → `raw_html`,
`changeTracking` → `change_tracking`.
> - **Blocks**
> - `crawl.py`, `scrape.py`, `search.py`: use new formats conversion and
updated options/fields; adjust iteration over results (`search`: iterate
`web` when present).
> - `map.py`: return both `links` and detailed `results`
(url/title/description) and update output schema accordingly.
> - **Project files**
> - Update `pyproject.toml` and `poetry.lock` for new dependency
versions.
> 
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
d872f2e82b. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->

> **Note**
> Automatic rebases have been disabled on this pull request as it has
been open for over 30 days.

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
Co-authored-by: Nicholas Tindle <ntindle@users.noreply.github.com>
Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
This commit is contained in:
dependabot[bot]
2025-10-02 15:14:18 -05:00
committed by GitHub
parent 57a06f7088
commit f5ee579ab2
9 changed files with 93 additions and 73 deletions

View File

@@ -0,0 +1,12 @@
from enum import Enum
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"

View File

@@ -0,0 +1,28 @@
"""Utility functions for converting between our ScrapeFormat enum and firecrawl FormatOption types."""
from typing import List
from firecrawl.v2.types import FormatOption, ScreenshotFormat
from backend.blocks.firecrawl._api import ScrapeFormat
def convert_to_format_options(
formats: List[ScrapeFormat],
) -> List[FormatOption]:
"""Convert our ScrapeFormat enum values to firecrawl FormatOption types.
Handles special cases like screenshot@fullPage which needs to be converted
to a ScreenshotFormat object.
"""
result: List[FormatOption] = []
for format_enum in formats:
if format_enum.value == "screenshot@fullPage":
# Special case: convert to ScreenshotFormat with full_page=True
result.append(ScreenshotFormat(type="screenshot", full_page=True))
else:
# Regular string literals
result.append(format_enum.value)
return result

View File

@@ -1,8 +1,9 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp, ScrapeOptions
from firecrawl import FirecrawlApp
from firecrawl.v2.types import ScrapeOptions
from backend.blocks.firecrawl._api import ScrapeFormat
from backend.sdk import (
APIKeyCredentials,
Block,
@@ -14,21 +15,10 @@ from backend.sdk import (
)
from ._config import firecrawl
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
from ._format_utils import convert_to_format_options
class FirecrawlCrawlBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The URL to crawl")
@@ -78,18 +68,17 @@ class FirecrawlCrawlBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
crawl_result = app.crawl_url(
crawl_result = app.crawl(
input_data.url,
limit=input_data.limit,
scrape_options=ScrapeOptions(
formats=[format.value for format in input_data.formats],
onlyMainContent=input_data.only_main_content,
maxAge=input_data.max_age,
waitFor=input_data.wait_for,
formats=convert_to_format_options(input_data.formats),
only_main_content=input_data.only_main_content,
max_age=input_data.max_age,
wait_for=input_data.wait_for,
),
)
yield "data", crawl_result.data
@@ -101,7 +90,7 @@ class FirecrawlCrawlBlock(Block):
elif f == ScrapeFormat.HTML:
yield "html", data.html
elif f == ScrapeFormat.RAW_HTML:
yield "raw_html", data.rawHtml
yield "raw_html", data.raw_html
elif f == ScrapeFormat.LINKS:
yield "links", data.links
elif f == ScrapeFormat.SCREENSHOT:
@@ -109,6 +98,6 @@ class FirecrawlCrawlBlock(Block):
elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
yield "screenshot_full_page", data.screenshot
elif f == ScrapeFormat.CHANGE_TRACKING:
yield "change_tracking", data.changeTracking
yield "change_tracking", data.change_tracking
elif f == ScrapeFormat.JSON:
yield "json", data.json

View File

@@ -20,7 +20,6 @@ from ._config import firecrawl
@cost(BlockCost(2, BlockCostType.RUN))
class FirecrawlExtractBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
urls: list[str] = SchemaField(
@@ -53,7 +52,6 @@ class FirecrawlExtractBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
extract_result = app.extract(

View File

@@ -1,3 +1,5 @@
from typing import Any
from firecrawl import FirecrawlApp
from backend.sdk import (
@@ -14,14 +16,16 @@ from ._config import firecrawl
class FirecrawlMapWebsiteBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The website url to map")
class Output(BlockSchema):
links: list[str] = SchemaField(description="The links of the website")
links: list[str] = SchemaField(description="List of URLs found on the website")
results: list[dict[str, Any]] = SchemaField(
description="List of search results with url, title, and description"
)
def __init__(self):
super().__init__(
@@ -35,12 +39,22 @@ class FirecrawlMapWebsiteBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
map_result = app.map_url(
map_result = app.map(
url=input_data.url,
)
yield "links", map_result.links
# Convert SearchResult objects to dicts
results_data = [
{
"url": link.url,
"title": link.title,
"description": link.description,
}
for link in map_result.links
]
yield "links", [link.url for link in map_result.links]
yield "results", results_data

View File

@@ -1,8 +1,8 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp
from backend.blocks.firecrawl._api import ScrapeFormat
from backend.sdk import (
APIKeyCredentials,
Block,
@@ -14,21 +14,10 @@ from backend.sdk import (
)
from ._config import firecrawl
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
from ._format_utils import convert_to_format_options
class FirecrawlScrapeBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The URL to crawl")
@@ -78,12 +67,11 @@ class FirecrawlScrapeBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
scrape_result = app.scrape_url(
scrape_result = app.scrape(
input_data.url,
formats=[format.value for format in input_data.formats],
formats=convert_to_format_options(input_data.formats),
only_main_content=input_data.only_main_content,
max_age=input_data.max_age,
wait_for=input_data.wait_for,
@@ -96,7 +84,7 @@ class FirecrawlScrapeBlock(Block):
elif f == ScrapeFormat.HTML:
yield "html", scrape_result.html
elif f == ScrapeFormat.RAW_HTML:
yield "raw_html", scrape_result.rawHtml
yield "raw_html", scrape_result.raw_html
elif f == ScrapeFormat.LINKS:
yield "links", scrape_result.links
elif f == ScrapeFormat.SCREENSHOT:
@@ -104,6 +92,6 @@ class FirecrawlScrapeBlock(Block):
elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
yield "screenshot_full_page", scrape_result.screenshot
elif f == ScrapeFormat.CHANGE_TRACKING:
yield "change_tracking", scrape_result.changeTracking
yield "change_tracking", scrape_result.change_tracking
elif f == ScrapeFormat.JSON:
yield "json", scrape_result.json

View File

@@ -1,8 +1,9 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp, ScrapeOptions
from firecrawl import FirecrawlApp
from firecrawl.v2.types import ScrapeOptions
from backend.blocks.firecrawl._api import ScrapeFormat
from backend.sdk import (
APIKeyCredentials,
Block,
@@ -14,21 +15,10 @@ from backend.sdk import (
)
from ._config import firecrawl
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
from ._format_utils import convert_to_format_options
class FirecrawlSearchBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
query: str = SchemaField(description="The query to search for")
@@ -61,7 +51,6 @@ class FirecrawlSearchBlock(Block):
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
@@ -69,11 +58,12 @@ class FirecrawlSearchBlock(Block):
input_data.query,
limit=input_data.limit,
scrape_options=ScrapeOptions(
formats=[format.value for format in input_data.formats],
maxAge=input_data.max_age,
waitFor=input_data.wait_for,
formats=convert_to_format_options(input_data.formats) or None,
max_age=input_data.max_age,
wait_for=input_data.wait_for,
),
)
yield "data", scrape_result
for site in scrape_result.data:
yield "site", site
if hasattr(scrape_result, "web") and scrape_result.web:
for site in scrape_result.web:
yield "site", site

View File

@@ -1339,20 +1339,21 @@ packaging = ">=20"
[[package]]
name = "firecrawl-py"
version = "2.16.3"
version = "4.3.6"
description = "Python SDK for Firecrawl API"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "firecrawl_py-2.16.3-py3-none-any.whl", hash = "sha256:94bb46af5e0df6c8ec414ac999a5355c0f5a46f15fd1cf5a02a3b31062db0aa8"},
{file = "firecrawl_py-2.16.3.tar.gz", hash = "sha256:5fd063ef4acc4c4be62648f1e11467336bc127780b3afc28d39078a012e6a14c"},
{file = "firecrawl_py-4.3.6-py3-none-any.whl", hash = "sha256:9b5dffdf5ed08fdbf0966f17e18c1a034d59f42a20b2bf9a6291a83190d7eb0f"},
{file = "firecrawl_py-4.3.6.tar.gz", hash = "sha256:303827a86d0f6237a8ddcaa0bcdaa4c5ee11d9a4880b0685302b8d9a0e191ee0"},
]
[package.dependencies]
aiohttp = "*"
httpx = "*"
nest-asyncio = "*"
pydantic = "*"
pydantic = ">=2.0"
python-dotenv = "*"
requests = "*"
websockets = "*"
@@ -7273,4 +7274,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
content-hash = "a19fcce9d4ab88f14eb1e5baa83e1e5a90c9995b04b84dae7cfa6257cf19012c"
content-hash = "2c9e0e6e1b4b7a5293cc4c9296937479e7c52c7d8d13315c76e3ae0e7833465d"

View File

@@ -78,7 +78,7 @@ aioclamd = "^1.0.0"
setuptools = "^80.9.0"
gcloud-aio-storage = "^9.5.0"
pandas = "^2.3.1"
firecrawl-py = "^2.16.3"
firecrawl-py = "^4.3.6"
exa-py = "^1.14.20"
croniter = "^6.0.0"
stagehand = "^0.5.1"