mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-14 16:55:13 -05:00
Compare commits
1 Commits
feat/enric
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b8f5c208d0 |
@@ -17,6 +17,7 @@ from backend.blocks.jina._auth import (
|
|||||||
from backend.blocks.search import GetRequest
|
from backend.blocks.search import GetRequest
|
||||||
from backend.data.model import SchemaField
|
from backend.data.model import SchemaField
|
||||||
from backend.util.exceptions import BlockExecutionError
|
from backend.util.exceptions import BlockExecutionError
|
||||||
|
from backend.util.request import HTTPClientError, HTTPServerError, validate_url
|
||||||
|
|
||||||
|
|
||||||
class SearchTheWebBlock(Block, GetRequest):
|
class SearchTheWebBlock(Block, GetRequest):
|
||||||
@@ -110,7 +111,12 @@ class ExtractWebsiteContentBlock(Block, GetRequest):
|
|||||||
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
||||||
) -> BlockOutput:
|
) -> BlockOutput:
|
||||||
if input_data.raw_content:
|
if input_data.raw_content:
|
||||||
url = input_data.url
|
try:
|
||||||
|
parsed_url, _, _ = await validate_url(input_data.url, [])
|
||||||
|
url = parsed_url.geturl()
|
||||||
|
except ValueError as e:
|
||||||
|
yield "error", f"Invalid URL: {e}"
|
||||||
|
return
|
||||||
headers = {}
|
headers = {}
|
||||||
else:
|
else:
|
||||||
url = f"https://r.jina.ai/{input_data.url}"
|
url = f"https://r.jina.ai/{input_data.url}"
|
||||||
@@ -119,5 +125,20 @@ class ExtractWebsiteContentBlock(Block, GetRequest):
|
|||||||
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
||||||
}
|
}
|
||||||
|
|
||||||
content = await self.get_request(url, json=False, headers=headers)
|
try:
|
||||||
|
content = await self.get_request(url, json=False, headers=headers)
|
||||||
|
except HTTPClientError as e:
|
||||||
|
yield "error", f"Client error ({e.status_code}) fetching {input_data.url}: {e}"
|
||||||
|
return
|
||||||
|
except HTTPServerError as e:
|
||||||
|
yield "error", f"Server error ({e.status_code}) fetching {input_data.url}: {e}"
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
yield "error", f"Failed to fetch {input_data.url}: {e}"
|
||||||
|
return
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
yield "error", f"No content returned for {input_data.url}"
|
||||||
|
return
|
||||||
|
|
||||||
yield "content", content
|
yield "content", content
|
||||||
|
|||||||
@@ -0,0 +1,66 @@
|
|||||||
|
from typing import cast
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.blocks.jina._auth import (
|
||||||
|
TEST_CREDENTIALS,
|
||||||
|
TEST_CREDENTIALS_INPUT,
|
||||||
|
JinaCredentialsInput,
|
||||||
|
)
|
||||||
|
from backend.blocks.jina.search import ExtractWebsiteContentBlock
|
||||||
|
from backend.util.request import HTTPClientError
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_extract_website_content_returns_content(monkeypatch):
|
||||||
|
block = ExtractWebsiteContentBlock()
|
||||||
|
input_data = block.Input(
|
||||||
|
url="https://example.com",
|
||||||
|
credentials=cast(JinaCredentialsInput, TEST_CREDENTIALS_INPUT),
|
||||||
|
raw_content=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def fake_get_request(url, json=False, headers=None):
|
||||||
|
assert url == "https://example.com"
|
||||||
|
assert headers == {}
|
||||||
|
return "page content"
|
||||||
|
|
||||||
|
monkeypatch.setattr(block, "get_request", fake_get_request)
|
||||||
|
|
||||||
|
results = [
|
||||||
|
output
|
||||||
|
async for output in block.run(
|
||||||
|
input_data=input_data, credentials=TEST_CREDENTIALS
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
assert ("content", "page content") in results
|
||||||
|
assert all(key != "error" for key, _ in results)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_extract_website_content_handles_http_error(monkeypatch):
|
||||||
|
block = ExtractWebsiteContentBlock()
|
||||||
|
input_data = block.Input(
|
||||||
|
url="https://example.com",
|
||||||
|
credentials=cast(JinaCredentialsInput, TEST_CREDENTIALS_INPUT),
|
||||||
|
raw_content=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def fake_get_request(url, json=False, headers=None):
|
||||||
|
raise HTTPClientError("HTTP 400 Error: Bad Request", 400)
|
||||||
|
|
||||||
|
monkeypatch.setattr(block, "get_request", fake_get_request)
|
||||||
|
|
||||||
|
results = [
|
||||||
|
output
|
||||||
|
async for output in block.run(
|
||||||
|
input_data=input_data, credentials=TEST_CREDENTIALS
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
assert ("content", "page content") not in results
|
||||||
|
error_messages = [value for key, value in results if key == "error"]
|
||||||
|
assert error_messages
|
||||||
|
assert "Client error (400)" in error_messages[0]
|
||||||
|
assert "https://example.com" in error_messages[0]
|
||||||
Reference in New Issue
Block a user