Files
AutoGPT/autogpt_platform/backend/test/blocks/test_youtube.py
lif 266e0d79d4 fix(blocks): add YouTube Shorts URL support (#11659)
## Summary
Added support for parsing YouTube Shorts URLs (`youtube.com/shorts/...`)
in the TranscribeYoutubeVideoBlock to extract video IDs correctly.

## Changes
- Modified `_extract_video_id` method in `youtube.py` to handle Shorts
URL format
- Added test cases for YouTube Shorts URL extraction

## Related Issue
Fixes #11500

## Test Plan
- [x] Added unit tests for YouTube Shorts URL extraction
- [x] Verified existing YouTube URL formats still work
- [x] CI should pass all existing tests

---------

Co-authored-by: Ubbe <hi@ubbe.dev>
2026-01-05 16:11:45 +00:00

192 lines
8.1 KiB
Python

from unittest.mock import Mock, patch
import pytest
from pydantic import SecretStr
from youtube_transcript_api._errors import NoTranscriptFound
from youtube_transcript_api._transcripts import FetchedTranscript, Transcript
from youtube_transcript_api.proxies import WebshareProxyConfig
from backend.blocks.youtube import TEST_CREDENTIALS, TranscribeYoutubeVideoBlock
from backend.data.model import UserPasswordCredentials
from backend.integrations.providers import ProviderName
class TestTranscribeYoutubeVideoBlock:
"""Test cases for TranscribeYoutubeVideoBlock language fallback functionality."""
def setup_method(self):
"""Set up test fixtures."""
self.youtube_block = TranscribeYoutubeVideoBlock()
self.credentials = TEST_CREDENTIALS
def test_extract_video_id_standard_url(self):
"""Test extracting video ID from standard YouTube URL."""
url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
video_id = self.youtube_block.extract_video_id(url)
assert video_id == "dQw4w9WgXcQ"
def test_extract_video_id_short_url(self):
"""Test extracting video ID from shortened youtu.be URL."""
url = "https://youtu.be/dQw4w9WgXcQ"
video_id = self.youtube_block.extract_video_id(url)
assert video_id == "dQw4w9WgXcQ"
def test_extract_video_id_embed_url(self):
"""Test extracting video ID from embed URL."""
url = "https://www.youtube.com/embed/dQw4w9WgXcQ"
video_id = self.youtube_block.extract_video_id(url)
assert video_id == "dQw4w9WgXcQ"
def test_extract_video_id_shorts_url(self):
"""Test extracting video ID from YouTube Shorts URL."""
url = "https://www.youtube.com/shorts/dtUqwMu3e-g"
video_id = self.youtube_block.extract_video_id(url)
assert video_id == "dtUqwMu3e-g"
def test_extract_video_id_shorts_url_with_params(self):
"""Test extracting video ID from YouTube Shorts URL with query parameters."""
url = "https://www.youtube.com/shorts/dtUqwMu3e-g?feature=share"
video_id = self.youtube_block.extract_video_id(url)
assert video_id == "dtUqwMu3e-g"
@patch("backend.blocks.youtube.YouTubeTranscriptApi")
def test_get_transcript_english_available(self, mock_api_class):
"""Test getting transcript when English is available."""
# Setup mock
mock_api = Mock()
mock_api_class.return_value = mock_api
mock_transcript = Mock(spec=FetchedTranscript)
mock_api.fetch.return_value = mock_transcript
# Execute
result = self.youtube_block.get_transcript("test_video_id", self.credentials)
# Assert
assert result == mock_transcript
mock_api_class.assert_called_once()
proxy_config = mock_api_class.call_args[1]["proxy_config"]
assert isinstance(proxy_config, WebshareProxyConfig)
mock_api.fetch.assert_called_once_with(video_id="test_video_id")
mock_api.list.assert_not_called()
@patch("backend.blocks.youtube.YouTubeTranscriptApi")
def test_get_transcript_with_custom_credentials(self, mock_api_class):
"""Test getting transcript with custom proxy credentials."""
# Setup mock
mock_api = Mock()
mock_api_class.return_value = mock_api
mock_transcript = Mock(spec=FetchedTranscript)
mock_api.fetch.return_value = mock_transcript
credentials = UserPasswordCredentials(
provider=ProviderName.WEBSHARE_PROXY,
username=SecretStr("custom_user"),
password=SecretStr("custom_pass"),
)
# Execute
result = self.youtube_block.get_transcript("test_video_id", credentials)
# Assert
assert result == mock_transcript
mock_api_class.assert_called_once()
proxy_config = mock_api_class.call_args[1]["proxy_config"]
assert isinstance(proxy_config, WebshareProxyConfig)
assert proxy_config.proxy_username == "custom_user"
assert proxy_config.proxy_password == "custom_pass"
mock_api.fetch.assert_called_once_with(video_id="test_video_id")
mock_api.list.assert_not_called()
@patch("backend.blocks.youtube.YouTubeTranscriptApi")
def test_get_transcript_fallback_to_first_available(self, mock_api_class):
"""Test fallback to first available language when English is not available."""
# Setup mock
mock_api = Mock()
mock_api_class.return_value = mock_api
# Create mock transcript list with Hungarian transcript
mock_transcript_list = Mock()
mock_transcript_hu = Mock(spec=Transcript)
mock_fetched_transcript = Mock(spec=FetchedTranscript)
mock_transcript_hu.fetch.return_value = mock_fetched_transcript
# Set up the transcript list to have manually created transcripts empty
# and generated transcripts with Hungarian
mock_transcript_list._manually_created_transcripts = {}
mock_transcript_list._generated_transcripts = {"hu": mock_transcript_hu}
# Mock API to raise NoTranscriptFound for English, then return list
mock_api.fetch.side_effect = NoTranscriptFound(
"test_video_id", ("en",), mock_transcript_list
)
mock_api.list.return_value = mock_transcript_list
# Execute
result = self.youtube_block.get_transcript("test_video_id", self.credentials)
# Assert
assert result == mock_fetched_transcript
mock_api_class.assert_called_once()
mock_api.fetch.assert_called_once_with(video_id="test_video_id")
mock_api.list.assert_called_once_with("test_video_id")
mock_transcript_hu.fetch.assert_called_once()
@patch("backend.blocks.youtube.YouTubeTranscriptApi")
def test_get_transcript_prefers_manually_created(self, mock_api_class):
"""Test that manually created transcripts are preferred over generated ones."""
# Setup mock
mock_api = Mock()
mock_api_class.return_value = mock_api
# Create mock transcript list with both manual and generated transcripts
mock_transcript_list = Mock()
mock_transcript_manual = Mock(spec=Transcript)
mock_transcript_generated = Mock(spec=Transcript)
mock_fetched_manual = Mock(spec=FetchedTranscript)
mock_transcript_manual.fetch.return_value = mock_fetched_manual
# Set up the transcript list
mock_transcript_list._manually_created_transcripts = {
"es": mock_transcript_manual
}
mock_transcript_list._generated_transcripts = {"hu": mock_transcript_generated}
# Mock API to raise NoTranscriptFound for English
mock_api.fetch.side_effect = NoTranscriptFound(
"test_video_id", ("en",), mock_transcript_list
)
mock_api.list.return_value = mock_transcript_list
# Execute
result = self.youtube_block.get_transcript("test_video_id", self.credentials)
# Assert - should use manually created transcript first
assert result == mock_fetched_manual
mock_api_class.assert_called_once()
mock_transcript_manual.fetch.assert_called_once()
mock_transcript_generated.fetch.assert_not_called()
@patch("backend.blocks.youtube.YouTubeTranscriptApi")
def test_get_transcript_no_transcripts_available(self, mock_api_class):
"""Test that exception is re-raised when no transcripts are available at all."""
# Setup mock
mock_api = Mock()
mock_api_class.return_value = mock_api
# Create mock transcript list with no transcripts
mock_transcript_list = Mock()
mock_transcript_list._manually_created_transcripts = {}
mock_transcript_list._generated_transcripts = {}
# Mock API to raise NoTranscriptFound
original_exception = NoTranscriptFound(
"test_video_id", ("en",), mock_transcript_list
)
mock_api.fetch.side_effect = original_exception
mock_api.list.return_value = mock_transcript_list
# Execute and assert exception is raised
with pytest.raises(NoTranscriptFound):
self.youtube_block.get_transcript("test_video_id", self.credentials)
mock_api_class.assert_called_once()