mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
* Fix SSRF via user-controlled ollama_host field Validate ollama_host against BLOCKED_IP_NETWORKS before passing to ollama.AsyncClient(). The server-configured default (env: OLLAMA_HOST) is allowed without validation; user-supplied values that differ are checked for private/internal IP resolution. Fixes GHSA-6jx2-4h7q-3fx3 * Generalize validate_ollama_host to validate_host; fix description line length * Rename to validate_untrusted_host with whitelist parameter * Apply PR suggestion: include whitelist in error message; run formatting * Move whitelist check after URL normalization; match on netloc * revert unrelated formatting changes * Dedup validate_url and validate_untrusted_host; normalize whitelist * Move _resolve_and_check_blocked after calling functions * dedup and clean up * make trusted_hostnames truly optional --------- Co-authored-by: Reinier van der Leer <pwuts@agpt.co>
145 lines
5.2 KiB
Python
145 lines
5.2 KiB
Python
from urllib.parse import quote
|
|
|
|
from backend.blocks._base import (
|
|
Block,
|
|
BlockCategory,
|
|
BlockOutput,
|
|
BlockSchemaInput,
|
|
BlockSchemaOutput,
|
|
)
|
|
from backend.blocks.jina._auth import (
|
|
TEST_CREDENTIALS,
|
|
TEST_CREDENTIALS_INPUT,
|
|
JinaCredentials,
|
|
JinaCredentialsField,
|
|
JinaCredentialsInput,
|
|
)
|
|
from backend.blocks.search import GetRequest
|
|
from backend.data.model import SchemaField
|
|
from backend.util.exceptions import BlockExecutionError
|
|
from backend.util.request import HTTPClientError, HTTPServerError, validate_url_host
|
|
|
|
|
|
class SearchTheWebBlock(Block, GetRequest):
|
|
class Input(BlockSchemaInput):
|
|
credentials: JinaCredentialsInput = JinaCredentialsField()
|
|
query: str = SchemaField(description="The search query to search the web for")
|
|
|
|
class Output(BlockSchemaOutput):
|
|
results: str = SchemaField(
|
|
description="The search results including content from top 5 URLs"
|
|
)
|
|
|
|
def __init__(self):
|
|
super().__init__(
|
|
id="87840993-2053-44b7-8da4-187ad4ee518c",
|
|
description="This block searches the internet for the given search query.",
|
|
categories={BlockCategory.SEARCH},
|
|
input_schema=SearchTheWebBlock.Input,
|
|
output_schema=SearchTheWebBlock.Output,
|
|
test_input={
|
|
"credentials": TEST_CREDENTIALS_INPUT,
|
|
"query": "Artificial Intelligence",
|
|
},
|
|
test_credentials=TEST_CREDENTIALS,
|
|
test_output=("results", "search content"),
|
|
test_mock={"get_request": lambda *args, **kwargs: "search content"},
|
|
)
|
|
|
|
async def run(
|
|
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
|
) -> BlockOutput:
|
|
# Encode the search query
|
|
encoded_query = quote(input_data.query)
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
|
}
|
|
|
|
# Prepend the Jina Search URL to the encoded query
|
|
jina_search_url = f"https://s.jina.ai/{encoded_query}"
|
|
|
|
try:
|
|
results = await self.get_request(
|
|
jina_search_url, headers=headers, json=False
|
|
)
|
|
except Exception as e:
|
|
raise BlockExecutionError(
|
|
message=f"Search failed: {e}",
|
|
block_name=self.name,
|
|
block_id=self.id,
|
|
) from e
|
|
|
|
# Output the search results
|
|
yield "results", results
|
|
|
|
|
|
class ExtractWebsiteContentBlock(Block, GetRequest):
|
|
class Input(BlockSchemaInput):
|
|
credentials: JinaCredentialsInput = JinaCredentialsField()
|
|
url: str = SchemaField(description="The URL to scrape the content from")
|
|
raw_content: bool = SchemaField(
|
|
default=False,
|
|
title="Raw Content",
|
|
description="Whether to do a raw scrape of the content or use Jina-ai Reader to scrape the content",
|
|
advanced=True,
|
|
)
|
|
|
|
class Output(BlockSchemaOutput):
|
|
content: str = SchemaField(description="The scraped content from the given URL")
|
|
error: str = SchemaField(
|
|
description="Error message if the content cannot be retrieved"
|
|
)
|
|
|
|
def __init__(self):
|
|
super().__init__(
|
|
id="436c3984-57fd-4b85-8e9a-459b356883bd",
|
|
description="This block scrapes the content from the given web URL.",
|
|
categories={BlockCategory.SEARCH},
|
|
input_schema=ExtractWebsiteContentBlock.Input,
|
|
output_schema=ExtractWebsiteContentBlock.Output,
|
|
test_input={
|
|
"url": "https://en.wikipedia.org/wiki/Artificial_intelligence",
|
|
"credentials": TEST_CREDENTIALS_INPUT,
|
|
},
|
|
test_credentials=TEST_CREDENTIALS,
|
|
test_output=("content", "scraped content"),
|
|
test_mock={"get_request": lambda *args, **kwargs: "scraped content"},
|
|
)
|
|
|
|
async def run(
|
|
self, input_data: Input, *, credentials: JinaCredentials, **kwargs
|
|
) -> BlockOutput:
|
|
if input_data.raw_content:
|
|
try:
|
|
parsed_url, _, _ = await validate_url_host(input_data.url)
|
|
url = parsed_url.geturl()
|
|
except ValueError as e:
|
|
yield "error", f"Invalid URL: {e}"
|
|
return
|
|
headers = {}
|
|
else:
|
|
url = f"https://r.jina.ai/{input_data.url}"
|
|
headers = {
|
|
"Content-Type": "application/json",
|
|
"Authorization": f"Bearer {credentials.api_key.get_secret_value()}",
|
|
}
|
|
|
|
try:
|
|
content = await self.get_request(url, json=False, headers=headers)
|
|
except HTTPClientError as e:
|
|
yield "error", f"Client error ({e.status_code}) fetching {input_data.url}: {e}"
|
|
return
|
|
except HTTPServerError as e:
|
|
yield "error", f"Server error ({e.status_code}) fetching {input_data.url}: {e}"
|
|
return
|
|
except Exception as e:
|
|
yield "error", f"Failed to fetch {input_data.url}: {e}"
|
|
return
|
|
|
|
if not content:
|
|
yield "error", f"No content returned for {input_data.url}"
|
|
return
|
|
|
|
yield "content", content
|