Compare commits

...

2 Commits

Author SHA1 Message Date
Toran Bruce Richards
8ce231f670 Merge branch 'master' into toran/open-1475-add-webscraper-block 2024-07-14 19:43:28 +01:00
Toran Bruce Richards
e0387bd857 feat(blocks): Add web scraper block to autogpt_server
- Import the web_scraper module in __init__.py
- Create a new file web_scraper.py with the WebScraper block
- Implement the run method to scrape content from a given URL
- Handle HTTP errors and exceptions in the run method
2024-07-14 01:49:59 +01:00
2 changed files with 37 additions and 2 deletions

View File

@@ -1,4 +1,4 @@
from autogpt_server.blocks import agent, sample, reddit, text, ai, wikipedia, discord
from autogpt_server.blocks import agent, sample, reddit, text, ai, wikipedia, discord, web_scraper
from autogpt_server.data.block import Block
AVAILABLE_BLOCKS = {
@@ -6,4 +6,4 @@ AVAILABLE_BLOCKS = {
for block in [v() for v in Block.__subclasses__()]
}
__all__ = ["agent", "ai", "sample", "reddit", "text", "AVAILABLE_BLOCKS", "wikipedia", "discord"]
__all__ = ["agent", "ai", "sample", "reddit", "text", "AVAILABLE_BLOCKS", "wikipedia", "discord", "web_scraper"]

View File

@@ -0,0 +1,35 @@
import requests
from autogpt_server.data.block import Block, BlockSchema, BlockOutput
class WebScraper(Block):
class Input(BlockSchema):
url: str # The URL to scrape
class Output(BlockSchema):
content: str # The scraped content from the URL
def __init__(self):
super().__init__(
id="a1b2c3d4-5e6f-7g8h-9i0j-k1l2m3n4o5p6", # Unique ID for the block
input_schema=WebScraper.Input,
output_schema=WebScraper.Output,
test_input={"url": "https://en.wikipedia.org/wiki/Artificial_intelligence"},
test_output={"content": "Artificial intelligence (AI) is intelligence..."},
)
def run(self, input_data: Input) -> BlockOutput:
try:
# Prepend the Jina-ai Reader URL to the input URL
jina_url = f"https://r.jina.ai/{input_data.url}"
# Make the request to Jina-ai Reader
response = requests.get(jina_url)
response.raise_for_status()
# Output the scraped content
yield "content", response.text
except requests.exceptions.HTTPError as http_err:
raise ValueError(f"HTTP error occurred: {http_err}")
except requests.RequestException as e:
raise ValueError(f"Request to Jina-ai Reader failed: {e}")