Compare commits

...

3 Commits

Author SHA1 Message Date
Swifty
3463db5106 Merge branch 'dev' into swiftyos/automat-69-add-youtube-support-to-ayrshire 2025-07-31 10:59:15 +02:00
Swifty
02bf557609 fixing and enabling post to youtube block 2025-07-31 10:54:20 +02:00
Swifty
369dcde782 Add firecrawl integration 2025-07-30 15:21:07 +02:00
11 changed files with 494 additions and 34 deletions

View File

@@ -1,3 +1,4 @@
from enum import Enum
from typing import Any
from backend.integrations.ayrshare import PostIds, PostResponse, SocialPlatform
@@ -14,6 +15,12 @@ from backend.sdk import (
from ._util import BaseAyrshareInput, create_ayrshare_client
class YouTubeVisibility(str, Enum):
PRIVATE = "private"
PUBLIC = "public"
UNLISTED = "unlisted"
class PostToYouTubeBlock(Block):
"""Block for posting to YouTube with YouTube-specific options."""
@@ -23,7 +30,6 @@ class PostToYouTubeBlock(Block):
# Override post field to include YouTube-specific information
post: str = SchemaField(
description="Video description (max 5,000 chars, empty string allowed). Cannot contain < or > characters.",
default="",
advanced=False,
)
@@ -37,55 +43,54 @@ class PostToYouTubeBlock(Block):
# YouTube-specific required options
title: str = SchemaField(
description="Video title (max 100 chars, required). Cannot contain < or > characters.",
default="",
advanced=False,
)
# YouTube-specific optional options
visibility: str = SchemaField(
description="Video visibility: 'private' (default), 'public', or 'unlisted'",
default="private",
advanced=True,
visibility: YouTubeVisibility = SchemaField(
description="Video visibility: 'private' (default), 'public' , or 'unlisted'",
default=YouTubeVisibility.PRIVATE,
advanced=False,
)
thumbnail: str = SchemaField(
thumbnail: str | None = SchemaField(
description="Thumbnail URL (JPEG/PNG under 2MB, must end in .png/.jpg/.jpeg). Requires phone verification.",
default="",
default=None,
advanced=True,
)
playlist_id: str = SchemaField(
playlist_id: str | None = SchemaField(
description="Playlist ID to add video (user must own playlist)",
default="",
default=None,
advanced=True,
)
tags: list[str] = SchemaField(
tags: list[str] | None = SchemaField(
description="Video tags (min 2 chars each, max 500 chars total)",
default_factory=list,
default=None,
advanced=True,
)
made_for_kids: bool = SchemaField(
description="Self-declared kids content", default=False, advanced=True
made_for_kids: bool | None = SchemaField(
description="Self-declared kids content", default=None, advanced=True
)
is_shorts: bool = SchemaField(
is_shorts: bool | None = SchemaField(
description="Post as YouTube Short (max 3 minutes, adds #shorts)",
default=False,
default=None,
advanced=True,
)
notify_subscribers: bool = SchemaField(
description="Send notification to subscribers", default=True, advanced=True
notify_subscribers: bool | None = SchemaField(
description="Send notification to subscribers", default=None, advanced=True
)
category_id: int = SchemaField(
category_id: int | None = SchemaField(
description="Video category ID (e.g., 24 = Entertainment)",
default=0,
default=None,
advanced=True,
)
contains_synthetic_media: bool = SchemaField(
contains_synthetic_media: bool | None = SchemaField(
description="Disclose realistic AI/synthetic content",
default=False,
default=None,
advanced=True,
)
publish_at: str = SchemaField(
publish_at: str | None = SchemaField(
description="UTC publish time (YouTube controlled, format: 2022-10-08T21:18:36Z)",
default="",
default=None,
advanced=True,
)
# YouTube targeting options (flattened from YouTubeTargeting object)
@@ -99,19 +104,19 @@ class PostToYouTubeBlock(Block):
default=None,
advanced=True,
)
subtitle_url: str = SchemaField(
subtitle_url: str | None = SchemaField(
description="URL to SRT or SBV subtitle file (must be HTTPS and end in .srt/.sbv, under 100MB)",
default="",
default=None,
advanced=True,
)
subtitle_language: str = SchemaField(
subtitle_language: str | None = SchemaField(
description="Language code for subtitles (default: 'en')",
default="en",
default=None,
advanced=True,
)
subtitle_name: str = SchemaField(
subtitle_name: str | None = SchemaField(
description="Name of caption track (max 150 chars, default: 'English')",
default="English",
default=None,
advanced=True,
)
@@ -121,7 +126,6 @@ class PostToYouTubeBlock(Block):
def __init__(self):
super().__init__(
disabled=True,
id="0082d712-ff1b-4c3d-8a8d-6c7721883b83",
description="Post to YouTube using Ayrshare",
categories={BlockCategory.SOCIAL},
@@ -219,7 +223,7 @@ class PostToYouTubeBlock(Block):
yield "error", "YouTube subtitle URL must end in .srt or .sbv"
return
if len(input_data.subtitle_name) > 150:
if input_data.subtitle_name and len(input_data.subtitle_name) > 150:
yield "error", f"YouTube subtitle name exceeds 150 character limit ({len(input_data.subtitle_name)} characters)"
return
@@ -258,7 +262,7 @@ class PostToYouTubeBlock(Block):
if not input_data.notify_subscribers:
youtube_options["notifySubscribers"] = False
if input_data.category_id > 0:
if input_data.category_id and input_data.category_id > 0:
youtube_options["categoryId"] = input_data.category_id
if input_data.contains_synthetic_media:

View File

@@ -0,0 +1,8 @@
from backend.sdk import BlockCostType, ProviderBuilder
firecrawl = (
ProviderBuilder("firecrawl")
.with_api_key("FIRECRAWL_API_KEY", "Firecrawl API Key")
.with_base_cost(1, BlockCostType.RUN)
.build()
)

View File

@@ -0,0 +1,114 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp, ScrapeOptions
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
SchemaField,
)
from ._config import firecrawl
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
class FirecrawlCrawlBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The URL to crawl")
limit: int = SchemaField(description="The number of pages to crawl", default=10)
only_main_content: bool = SchemaField(
description="Only return the main content of the page excluding headers, navs, footers, etc.",
default=True,
)
max_age: int = SchemaField(
description="The maximum age of the page in milliseconds - default is 1 hour",
default=3600000,
)
wait_for: int = SchemaField(
description="Specify a delay in milliseconds before fetching the content, allowing the page sufficient time to load.",
default=0,
)
formats: list[ScrapeFormat] = SchemaField(
description="The format of the crawl", default=[ScrapeFormat.MARKDOWN]
)
class Output(BlockSchema):
data: list[dict[str, Any]] = SchemaField(description="The result of the crawl")
markdown: str = SchemaField(description="The markdown of the crawl")
html: str = SchemaField(description="The html of the crawl")
raw_html: str = SchemaField(description="The raw html of the crawl")
links: list[str] = SchemaField(description="The links of the crawl")
screenshot: str = SchemaField(description="The screenshot of the crawl")
screenshot_full_page: str = SchemaField(
description="The screenshot full page of the crawl"
)
json_data: dict[str, Any] = SchemaField(
description="The json data of the crawl"
)
change_tracking: dict[str, Any] = SchemaField(
description="The change tracking of the crawl"
)
def __init__(self):
super().__init__(
id="bdbbaba0-03b7-4971-970e-699e2de6015e",
description="Firecrawl crawls websites to extract comprehensive data while bypassing blockers.",
categories={BlockCategory.SEARCH},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
crawl_result = app.crawl_url(
input_data.url,
limit=input_data.limit,
scrape_options=ScrapeOptions(
formats=[format.value for format in input_data.formats],
onlyMainContent=input_data.only_main_content,
maxAge=input_data.max_age,
waitFor=input_data.wait_for,
),
)
yield "data", crawl_result.data
for data in crawl_result.data:
for f in input_data.formats:
if f == ScrapeFormat.MARKDOWN:
yield "markdown", data.markdown
elif f == ScrapeFormat.HTML:
yield "html", data.html
elif f == ScrapeFormat.RAW_HTML:
yield "raw_html", data.rawHtml
elif f == ScrapeFormat.LINKS:
yield "links", data.links
elif f == ScrapeFormat.SCREENSHOT:
yield "screenshot", data.screenshot
elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
yield "screenshot_full_page", data.screenshot
elif f == ScrapeFormat.CHANGE_TRACKING:
yield "change_tracking", data.changeTracking
elif f == ScrapeFormat.JSON:
yield "json", data.json

View File

@@ -0,0 +1,67 @@
from typing import Any
from firecrawl import FirecrawlApp
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockCost,
BlockCostType,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
SchemaField,
cost,
)
from ._config import firecrawl
@cost(BlockCost(2, BlockCostType.RUN))
class FirecrawlExtractBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
urls: list[str] = SchemaField(
description="The URLs to crawl - at least one is required. Wildcards are supported. (/*)"
)
prompt: str | None = SchemaField(
description="The prompt to use for the crawl", default=None, advanced=False
)
output_schema: str | None = SchemaField(
description="A more rigid structure if you already know the JSON layout.",
default=None,
)
enable_web_search: bool = SchemaField(
description="When true, extraction can follow links outside the specified domain.",
default=False,
)
class Output(BlockSchema):
data: dict[str, Any] = SchemaField(description="The result of the crawl")
def __init__(self):
super().__init__(
id="d1774756-4d9e-40e6-bab1-47ec0ccd81b2",
description="Firecrawl crawls websites to extract comprehensive data while bypassing blockers.",
categories={BlockCategory.SEARCH},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
extract_result = app.extract(
urls=input_data.urls,
prompt=input_data.prompt,
schema=input_data.output_schema,
enable_web_search=input_data.enable_web_search,
)
yield "data", extract_result.data

View File

@@ -0,0 +1,46 @@
from firecrawl import FirecrawlApp
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
SchemaField,
)
from ._config import firecrawl
class FirecrawlMapWebsiteBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The website url to map")
class Output(BlockSchema):
links: list[str] = SchemaField(description="The links of the website")
def __init__(self):
super().__init__(
id="f0f43e2b-c943-48a0-a7f1-40136ca4d3b9",
description="Firecrawl maps a website to extract all the links.",
categories={BlockCategory.SEARCH},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
map_result = app.map_url(
url=input_data.url,
)
yield "links", map_result.links

View File

@@ -0,0 +1,109 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
SchemaField,
)
from ._config import firecrawl
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
class FirecrawlScrapeBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
url: str = SchemaField(description="The URL to crawl")
limit: int = SchemaField(description="The number of pages to crawl", default=10)
only_main_content: bool = SchemaField(
description="Only return the main content of the page excluding headers, navs, footers, etc.",
default=True,
)
max_age: int = SchemaField(
description="The maximum age of the page in milliseconds - default is 1 hour",
default=3600000,
)
wait_for: int = SchemaField(
description="Specify a delay in milliseconds before fetching the content, allowing the page sufficient time to load.",
default=200,
)
formats: list[ScrapeFormat] = SchemaField(
description="The format of the crawl", default=[ScrapeFormat.MARKDOWN]
)
class Output(BlockSchema):
data: dict[str, Any] = SchemaField(description="The result of the crawl")
markdown: str = SchemaField(description="The markdown of the crawl")
html: str = SchemaField(description="The html of the crawl")
raw_html: str = SchemaField(description="The raw html of the crawl")
links: list[str] = SchemaField(description="The links of the crawl")
screenshot: str = SchemaField(description="The screenshot of the crawl")
screenshot_full_page: str = SchemaField(
description="The screenshot full page of the crawl"
)
json_data: dict[str, Any] = SchemaField(
description="The json data of the crawl"
)
change_tracking: dict[str, Any] = SchemaField(
description="The change tracking of the crawl"
)
def __init__(self):
super().__init__(
id="ac444320-cf5e-4697-b586-2604c17a3e75",
description="Firecrawl scrapes a website to extract comprehensive data while bypassing blockers.",
categories={BlockCategory.SEARCH},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
scrape_result = app.scrape_url(
input_data.url,
formats=[format.value for format in input_data.formats],
only_main_content=input_data.only_main_content,
max_age=input_data.max_age,
wait_for=input_data.wait_for,
)
yield "data", scrape_result
for f in input_data.formats:
if f == ScrapeFormat.MARKDOWN:
yield "markdown", scrape_result.markdown
elif f == ScrapeFormat.HTML:
yield "html", scrape_result.html
elif f == ScrapeFormat.RAW_HTML:
yield "raw_html", scrape_result.rawHtml
elif f == ScrapeFormat.LINKS:
yield "links", scrape_result.links
elif f == ScrapeFormat.SCREENSHOT:
yield "screenshot", scrape_result.screenshot
elif f == ScrapeFormat.SCREENSHOT_FULL_PAGE:
yield "screenshot_full_page", scrape_result.screenshot
elif f == ScrapeFormat.CHANGE_TRACKING:
yield "change_tracking", scrape_result.changeTracking
elif f == ScrapeFormat.JSON:
yield "json", scrape_result.json

View File

@@ -0,0 +1,79 @@
from enum import Enum
from typing import Any
from firecrawl import FirecrawlApp, ScrapeOptions
from backend.sdk import (
APIKeyCredentials,
Block,
BlockCategory,
BlockOutput,
BlockSchema,
CredentialsMetaInput,
SchemaField,
)
from ._config import firecrawl
class ScrapeFormat(Enum):
MARKDOWN = "markdown"
HTML = "html"
RAW_HTML = "rawHtml"
LINKS = "links"
SCREENSHOT = "screenshot"
SCREENSHOT_FULL_PAGE = "screenshot@fullPage"
JSON = "json"
CHANGE_TRACKING = "changeTracking"
class FirecrawlSearchBlock(Block):
class Input(BlockSchema):
credentials: CredentialsMetaInput = firecrawl.credentials_field()
query: str = SchemaField(description="The query to search for")
limit: int = SchemaField(description="The number of pages to crawl", default=10)
max_age: int = SchemaField(
description="The maximum age of the page in milliseconds - default is 1 hour",
default=3600000,
)
wait_for: int = SchemaField(
description="Specify a delay in milliseconds before fetching the content, allowing the page sufficient time to load.",
default=200,
)
formats: list[ScrapeFormat] = SchemaField(
description="Returns the content of the search if specified", default=[]
)
class Output(BlockSchema):
data: dict[str, Any] = SchemaField(description="The result of the search")
site: dict[str, Any] = SchemaField(description="The site of the search")
def __init__(self):
super().__init__(
id="f8d2f28d-b3a1-405b-804e-418c087d288b",
description="Firecrawl searches the web for the given query.",
categories={BlockCategory.SEARCH},
input_schema=self.Input,
output_schema=self.Output,
)
async def run(
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
) -> BlockOutput:
app = FirecrawlApp(api_key=credentials.api_key.get_secret_value())
# Sync call
scrape_result = app.search(
input_data.query,
limit=input_data.limit,
scrape_options=ScrapeOptions(
formats=[format.value for format in input_data.formats],
maxAge=input_data.max_age,
waitFor=input_data.wait_for,
),
)
yield "data", scrape_result
for site in scrape_result.data:
yield "site", site

View File

@@ -1212,6 +1212,26 @@ files = [
[package.dependencies]
packaging = ">=20"
[[package]]
name = "firecrawl-py"
version = "2.16.3"
description = "Python SDK for Firecrawl API"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "firecrawl_py-2.16.3-py3-none-any.whl", hash = "sha256:94bb46af5e0df6c8ec414ac999a5355c0f5a46f15fd1cf5a02a3b31062db0aa8"},
{file = "firecrawl_py-2.16.3.tar.gz", hash = "sha256:5fd063ef4acc4c4be62648f1e11467336bc127780b3afc28d39078a012e6a14c"},
]
[package.dependencies]
aiohttp = "*"
nest-asyncio = "*"
pydantic = "*"
python-dotenv = "*"
requests = "*"
websockets = "*"
[[package]]
name = "flake8"
version = "7.3.0"
@@ -2902,6 +2922,18 @@ files = [
{file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"},
]
[[package]]
name = "nest-asyncio"
version = "1.6.0"
description = "Patch asyncio to allow nested event loops"
optional = false
python-versions = ">=3.5"
groups = ["main"]
files = [
{file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"},
{file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"},
]
[[package]]
name = "nodeenv"
version = "1.9.1"
@@ -6686,4 +6718,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.13"
content-hash = "e79fd4e1968b496b1012c4866130f5680fc6558d041ddddd53f466e8ec58869c"
content-hash = "225ddae645d22cc57f46330e735c069fb52e708123aa642e74adbf077dda0796"

View File

@@ -74,6 +74,7 @@ aioclamd = "^1.0.0"
setuptools = "^80.9.0"
gcloud-aio-storage = "^9.5.0"
pandas = "^2.3.1"
firecrawl-py = "^2.16.3"
[tool.poetry.group.dev.dependencies]
aiohappyeyeballs = "^2.6.1"