mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(blocks): Add web scraper block to autogpt_server
- Import the web_scraper module in __init__.py - Create a new file web_scraper.py with the WebScraper block - Implement the run method to scrape content from a given URL - Handle HTTP errors and exceptions in the run method
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from autogpt_server.blocks import agent, sample, reddit, text, ai, wikipedia, discord
|
||||
from autogpt_server.blocks import agent, sample, reddit, text, ai, wikipedia, discord, web_scraper
|
||||
from autogpt_server.data.block import Block
|
||||
|
||||
AVAILABLE_BLOCKS = {
|
||||
@@ -6,4 +6,4 @@ AVAILABLE_BLOCKS = {
|
||||
for block in [v() for v in Block.__subclasses__()]
|
||||
}
|
||||
|
||||
__all__ = ["agent", "ai", "sample", "reddit", "text", "AVAILABLE_BLOCKS", "wikipedia", "discord"]
|
||||
__all__ = ["agent", "ai", "sample", "reddit", "text", "AVAILABLE_BLOCKS", "wikipedia", "discord", "web_scraper"]
|
||||
35
rnd/autogpt_server/autogpt_server/blocks/web_scraper.py
Normal file
35
rnd/autogpt_server/autogpt_server/blocks/web_scraper.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import requests
|
||||
from autogpt_server.data.block import Block, BlockSchema, BlockOutput
|
||||
|
||||
class WebScraper(Block):
|
||||
class Input(BlockSchema):
|
||||
url: str # The URL to scrape
|
||||
|
||||
class Output(BlockSchema):
|
||||
content: str # The scraped content from the URL
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="a1b2c3d4-5e6f-7g8h-9i0j-k1l2m3n4o5p6", # Unique ID for the block
|
||||
input_schema=WebScraper.Input,
|
||||
output_schema=WebScraper.Output,
|
||||
test_input={"url": "https://en.wikipedia.org/wiki/Artificial_intelligence"},
|
||||
test_output={"content": "Artificial intelligence (AI) is intelligence..."},
|
||||
)
|
||||
|
||||
def run(self, input_data: Input) -> BlockOutput:
|
||||
try:
|
||||
# Prepend the Jina-ai Reader URL to the input URL
|
||||
jina_url = f"https://r.jina.ai/{input_data.url}"
|
||||
|
||||
# Make the request to Jina-ai Reader
|
||||
response = requests.get(jina_url)
|
||||
response.raise_for_status()
|
||||
|
||||
# Output the scraped content
|
||||
yield "content", response.text
|
||||
|
||||
except requests.exceptions.HTTPError as http_err:
|
||||
raise ValueError(f"HTTP error occurred: {http_err}")
|
||||
except requests.RequestException as e:
|
||||
raise ValueError(f"Request to Jina-ai Reader failed: {e}")
|
||||
Reference in New Issue
Block a user