mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-09 15:17:59 -05:00
fix(backend): Fix Youtube blocking our cloud ips (#11456)
Youtube can blocks cloud ips causing the youtube transcribe blocks to
not work. This PR adds webshare proxy to get around this issue
### Changes 🏗️
- add webshare proxy to youtube transcribe block
### Checklist 📋
#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
<!-- Put your test plan here: -->
- [x] I have tested this works locally using the proxy
<!-- CURSOR_SUMMARY -->
---
> [!NOTE]
> Routes YouTube transcript fetching through Webshare proxy using
user/password credentials, wiring in provider enum, settings, default
credentials, and updated tests.
>
> - **Blocks** (`backend/blocks/youtube.py`):
> - Use `WebshareProxyConfig` with `YouTubeTranscriptApi` to fetch
transcripts via proxy.
> - Add `credentials` input (`user_password` for `webshare_proxy`);
include test credentials and mocks.
> - Update method signatures: `get_transcript(video_id, credentials)`
and `run(..., *, credentials, ...)`.
> - Change description to indicate proxy usage; add logging.
> - **Integrations**:
> - Providers (`backend/integrations/providers.py`): add
`ProviderName.WEBSHARE_PROXY`.
> - Credentials store (`backend/integrations/credentials_store.py`): add
`webshare_proxy` `UserPasswordCredentials`; include in
`DEFAULT_CREDENTIALS` and conditionally in `get_all_creds`.
> - **Settings** (`backend/util/settings.py`): add secrets
`webshare_proxy_username` and `webshare_proxy_password`.
> - **Tests** (`test/blocks/test_youtube.py`): update to pass
credentials and assert proxy config; add custom-credentials test; adjust
fallback/priority tests.
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
d060898488. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->
Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co>
This commit is contained in:
@@ -1,9 +1,13 @@
|
||||
import logging
|
||||
from typing import Literal
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from pydantic import SecretStr
|
||||
from youtube_transcript_api._api import YouTubeTranscriptApi
|
||||
from youtube_transcript_api._errors import NoTranscriptFound
|
||||
from youtube_transcript_api._transcripts import FetchedTranscript
|
||||
from youtube_transcript_api.formatters import TextFormatter
|
||||
from youtube_transcript_api.proxies import WebshareProxyConfig
|
||||
|
||||
from backend.data.block import (
|
||||
Block,
|
||||
@@ -12,7 +16,42 @@ from backend.data.block import (
|
||||
BlockSchemaInput,
|
||||
BlockSchemaOutput,
|
||||
)
|
||||
from backend.data.model import SchemaField
|
||||
from backend.data.model import (
|
||||
CredentialsField,
|
||||
CredentialsMetaInput,
|
||||
SchemaField,
|
||||
UserPasswordCredentials,
|
||||
)
|
||||
from backend.integrations.providers import ProviderName
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TEST_CREDENTIALS = UserPasswordCredentials(
|
||||
id="01234567-89ab-cdef-0123-456789abcdef",
|
||||
provider="webshare_proxy",
|
||||
username=SecretStr("mock-webshare-username"),
|
||||
password=SecretStr("mock-webshare-password"),
|
||||
title="Mock Webshare Proxy credentials",
|
||||
)
|
||||
|
||||
TEST_CREDENTIALS_INPUT = {
|
||||
"provider": TEST_CREDENTIALS.provider,
|
||||
"id": TEST_CREDENTIALS.id,
|
||||
"type": TEST_CREDENTIALS.type,
|
||||
"title": TEST_CREDENTIALS.title,
|
||||
}
|
||||
|
||||
WebshareProxyCredentials = UserPasswordCredentials
|
||||
WebshareProxyCredentialsInput = CredentialsMetaInput[
|
||||
Literal[ProviderName.WEBSHARE_PROXY],
|
||||
Literal["user_password"],
|
||||
]
|
||||
|
||||
|
||||
def WebshareProxyCredentialsField() -> WebshareProxyCredentialsInput:
|
||||
return CredentialsField(
|
||||
description="Webshare proxy credentials for fetching YouTube transcripts",
|
||||
)
|
||||
|
||||
|
||||
class TranscribeYoutubeVideoBlock(Block):
|
||||
@@ -22,6 +61,7 @@ class TranscribeYoutubeVideoBlock(Block):
|
||||
description="The URL of the YouTube video to transcribe",
|
||||
placeholder="https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||
)
|
||||
credentials: WebshareProxyCredentialsInput = WebshareProxyCredentialsField()
|
||||
|
||||
class Output(BlockSchemaOutput):
|
||||
video_id: str = SchemaField(description="The extracted YouTube video ID")
|
||||
@@ -35,9 +75,12 @@ class TranscribeYoutubeVideoBlock(Block):
|
||||
id="f3a8f7e1-4b1d-4e5f-9f2a-7c3d5a2e6b4c",
|
||||
input_schema=TranscribeYoutubeVideoBlock.Input,
|
||||
output_schema=TranscribeYoutubeVideoBlock.Output,
|
||||
description="Transcribes a YouTube video.",
|
||||
description="Transcribes a YouTube video using a proxy.",
|
||||
categories={BlockCategory.SOCIAL},
|
||||
test_input={"youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"},
|
||||
test_input={
|
||||
"youtube_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||
"credentials": TEST_CREDENTIALS_INPUT,
|
||||
},
|
||||
test_output=[
|
||||
("video_id", "dQw4w9WgXcQ"),
|
||||
(
|
||||
@@ -45,8 +88,9 @@ class TranscribeYoutubeVideoBlock(Block):
|
||||
"Never gonna give you up\nNever gonna let you down",
|
||||
),
|
||||
],
|
||||
test_credentials=TEST_CREDENTIALS,
|
||||
test_mock={
|
||||
"get_transcript": lambda video_id: [
|
||||
"get_transcript": lambda video_id, credentials: [
|
||||
{"text": "Never gonna give you up"},
|
||||
{"text": "Never gonna let you down"},
|
||||
],
|
||||
@@ -69,16 +113,27 @@ class TranscribeYoutubeVideoBlock(Block):
|
||||
return parsed_url.path.split("/")[2]
|
||||
raise ValueError(f"Invalid YouTube URL: {url}")
|
||||
|
||||
@staticmethod
|
||||
def get_transcript(video_id: str) -> FetchedTranscript:
|
||||
def get_transcript(
|
||||
self, video_id: str, credentials: WebshareProxyCredentials
|
||||
) -> FetchedTranscript:
|
||||
"""
|
||||
Get transcript for a video, preferring English but falling back to any available language.
|
||||
|
||||
:param video_id: The YouTube video ID
|
||||
:param credentials: The Webshare proxy credentials
|
||||
:return: The fetched transcript
|
||||
:raises: Any exception except NoTranscriptFound for requested languages
|
||||
"""
|
||||
api = YouTubeTranscriptApi()
|
||||
logger.warning(
|
||||
"Using Webshare proxy for YouTube transcript fetch (video_id=%s)",
|
||||
video_id,
|
||||
)
|
||||
proxy_config = WebshareProxyConfig(
|
||||
proxy_username=credentials.username.get_secret_value(),
|
||||
proxy_password=credentials.password.get_secret_value(),
|
||||
)
|
||||
|
||||
api = YouTubeTranscriptApi(proxy_config=proxy_config)
|
||||
try:
|
||||
# Try to get English transcript first (default behavior)
|
||||
return api.fetch(video_id=video_id)
|
||||
@@ -101,11 +156,17 @@ class TranscribeYoutubeVideoBlock(Block):
|
||||
transcript_text = formatter.format_transcript(transcript)
|
||||
return transcript_text
|
||||
|
||||
async def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||
async def run(
|
||||
self,
|
||||
input_data: Input,
|
||||
*,
|
||||
credentials: WebshareProxyCredentials,
|
||||
**kwargs,
|
||||
) -> BlockOutput:
|
||||
video_id = self.extract_video_id(input_data.youtube_url)
|
||||
yield "video_id", video_id
|
||||
|
||||
transcript = self.get_transcript(video_id)
|
||||
transcript = self.get_transcript(video_id, credentials)
|
||||
transcript_text = self.format_transcript(transcript=transcript)
|
||||
|
||||
yield "transcript", transcript_text
|
||||
|
||||
@@ -15,6 +15,7 @@ from backend.data.model import (
|
||||
OAuth2Credentials,
|
||||
OAuthState,
|
||||
UserIntegrations,
|
||||
UserPasswordCredentials,
|
||||
)
|
||||
from backend.data.redis_client import get_redis_async
|
||||
from backend.util.settings import Settings
|
||||
@@ -207,6 +208,14 @@ v0_credentials = APIKeyCredentials(
|
||||
expires_at=None,
|
||||
)
|
||||
|
||||
webshare_proxy_credentials = UserPasswordCredentials(
|
||||
id="a5b3c7d9-2e4f-4a6b-8c1d-9e0f1a2b3c4d",
|
||||
provider="webshare_proxy",
|
||||
username=SecretStr(settings.secrets.webshare_proxy_username),
|
||||
password=SecretStr(settings.secrets.webshare_proxy_password),
|
||||
title="Use Credits for Webshare Proxy",
|
||||
)
|
||||
|
||||
DEFAULT_CREDENTIALS = [
|
||||
ollama_credentials,
|
||||
revid_credentials,
|
||||
@@ -233,6 +242,7 @@ DEFAULT_CREDENTIALS = [
|
||||
google_maps_credentials,
|
||||
llama_api_credentials,
|
||||
v0_credentials,
|
||||
webshare_proxy_credentials,
|
||||
]
|
||||
|
||||
|
||||
@@ -321,6 +331,11 @@ class IntegrationCredentialsStore:
|
||||
all_credentials.append(zerobounce_credentials)
|
||||
if settings.secrets.google_maps_api_key:
|
||||
all_credentials.append(google_maps_credentials)
|
||||
if (
|
||||
settings.secrets.webshare_proxy_username
|
||||
and settings.secrets.webshare_proxy_password
|
||||
):
|
||||
all_credentials.append(webshare_proxy_credentials)
|
||||
return all_credentials
|
||||
|
||||
async def get_creds_by_id(
|
||||
|
||||
@@ -49,6 +49,7 @@ class ProviderName(str, Enum):
|
||||
TODOIST = "todoist"
|
||||
UNREAL_SPEECH = "unreal_speech"
|
||||
V0 = "v0"
|
||||
WEBSHARE_PROXY = "webshare_proxy"
|
||||
ZEROBOUNCE = "zerobounce"
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -571,6 +571,12 @@ class Secrets(UpdateTrackingModel["Secrets"], BaseSettings):
|
||||
open_router_api_key: str = Field(default="", description="Open Router API Key")
|
||||
llama_api_key: str = Field(default="", description="Llama API Key")
|
||||
v0_api_key: str = Field(default="", description="v0 by Vercel API key")
|
||||
webshare_proxy_username: str = Field(
|
||||
default="", description="Webshare Proxy Username"
|
||||
)
|
||||
webshare_proxy_password: str = Field(
|
||||
default="", description="Webshare Proxy Password"
|
||||
)
|
||||
|
||||
reddit_client_id: str = Field(default="", description="Reddit client ID")
|
||||
reddit_client_secret: str = Field(default="", description="Reddit client secret")
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from pydantic import SecretStr
|
||||
from youtube_transcript_api._errors import NoTranscriptFound
|
||||
from youtube_transcript_api._transcripts import FetchedTranscript, Transcript
|
||||
from youtube_transcript_api.proxies import WebshareProxyConfig
|
||||
|
||||
from backend.blocks.youtube import TranscribeYoutubeVideoBlock
|
||||
from backend.blocks.youtube import TEST_CREDENTIALS, TranscribeYoutubeVideoBlock
|
||||
from backend.data.model import UserPasswordCredentials
|
||||
from backend.integrations.providers import ProviderName
|
||||
|
||||
|
||||
class TestTranscribeYoutubeVideoBlock:
|
||||
@@ -13,6 +17,7 @@ class TestTranscribeYoutubeVideoBlock:
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.youtube_block = TranscribeYoutubeVideoBlock()
|
||||
self.credentials = TEST_CREDENTIALS
|
||||
|
||||
def test_extract_video_id_standard_url(self):
|
||||
"""Test extracting video ID from standard YouTube URL."""
|
||||
@@ -42,10 +47,41 @@ class TestTranscribeYoutubeVideoBlock:
|
||||
mock_api.fetch.return_value = mock_transcript
|
||||
|
||||
# Execute
|
||||
result = TranscribeYoutubeVideoBlock.get_transcript("test_video_id")
|
||||
result = self.youtube_block.get_transcript("test_video_id", self.credentials)
|
||||
|
||||
# Assert
|
||||
assert result == mock_transcript
|
||||
mock_api_class.assert_called_once()
|
||||
proxy_config = mock_api_class.call_args[1]["proxy_config"]
|
||||
assert isinstance(proxy_config, WebshareProxyConfig)
|
||||
mock_api.fetch.assert_called_once_with(video_id="test_video_id")
|
||||
mock_api.list.assert_not_called()
|
||||
|
||||
@patch("backend.blocks.youtube.YouTubeTranscriptApi")
|
||||
def test_get_transcript_with_custom_credentials(self, mock_api_class):
|
||||
"""Test getting transcript with custom proxy credentials."""
|
||||
# Setup mock
|
||||
mock_api = Mock()
|
||||
mock_api_class.return_value = mock_api
|
||||
mock_transcript = Mock(spec=FetchedTranscript)
|
||||
mock_api.fetch.return_value = mock_transcript
|
||||
|
||||
credentials = UserPasswordCredentials(
|
||||
provider=ProviderName.WEBSHARE_PROXY,
|
||||
username=SecretStr("custom_user"),
|
||||
password=SecretStr("custom_pass"),
|
||||
)
|
||||
|
||||
# Execute
|
||||
result = self.youtube_block.get_transcript("test_video_id", credentials)
|
||||
|
||||
# Assert
|
||||
assert result == mock_transcript
|
||||
mock_api_class.assert_called_once()
|
||||
proxy_config = mock_api_class.call_args[1]["proxy_config"]
|
||||
assert isinstance(proxy_config, WebshareProxyConfig)
|
||||
assert proxy_config.proxy_username == "custom_user"
|
||||
assert proxy_config.proxy_password == "custom_pass"
|
||||
mock_api.fetch.assert_called_once_with(video_id="test_video_id")
|
||||
mock_api.list.assert_not_called()
|
||||
|
||||
@@ -74,10 +110,11 @@ class TestTranscribeYoutubeVideoBlock:
|
||||
mock_api.list.return_value = mock_transcript_list
|
||||
|
||||
# Execute
|
||||
result = TranscribeYoutubeVideoBlock.get_transcript("test_video_id")
|
||||
result = self.youtube_block.get_transcript("test_video_id", self.credentials)
|
||||
|
||||
# Assert
|
||||
assert result == mock_fetched_transcript
|
||||
mock_api_class.assert_called_once()
|
||||
mock_api.fetch.assert_called_once_with(video_id="test_video_id")
|
||||
mock_api.list.assert_called_once_with("test_video_id")
|
||||
mock_transcript_hu.fetch.assert_called_once()
|
||||
@@ -109,10 +146,11 @@ class TestTranscribeYoutubeVideoBlock:
|
||||
mock_api.list.return_value = mock_transcript_list
|
||||
|
||||
# Execute
|
||||
result = TranscribeYoutubeVideoBlock.get_transcript("test_video_id")
|
||||
result = self.youtube_block.get_transcript("test_video_id", self.credentials)
|
||||
|
||||
# Assert - should use manually created transcript first
|
||||
assert result == mock_fetched_manual
|
||||
mock_api_class.assert_called_once()
|
||||
mock_transcript_manual.fetch.assert_called_once()
|
||||
mock_transcript_generated.fetch.assert_not_called()
|
||||
|
||||
@@ -137,4 +175,5 @@ class TestTranscribeYoutubeVideoBlock:
|
||||
|
||||
# Execute and assert exception is raised
|
||||
with pytest.raises(NoTranscriptFound):
|
||||
TranscribeYoutubeVideoBlock.get_transcript("test_video_id")
|
||||
self.youtube_block.get_transcript("test_video_id", self.credentials)
|
||||
mock_api_class.assert_called_once()
|
||||
|
||||
Reference in New Issue
Block a user