mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
## Summary This PR fixes and enhances the Exa Websets implementation to resolve issues with the expand_items parameter and improve the overall block functionality. The changes address UI limitations with nested response objects while providing a more comprehensive and user-friendly interface for creating and managing Exa websets. [Websets_v14.json](https://github.com/user-attachments/files/21596313/Websets_v14.json) <img width="1335" height="949" alt="Screenshot 2025-08-05 at 11 45 07" src="https://github.com/user-attachments/assets/3a9b3da0-3950-4388-96b2-e5dfa9df9b67" /> **Why these changes are necessary:** 1. **UI Compatibility**: The current implementation returns deeply nested objects that cause the UI to crash. This PR flattens the input parameters and returns simplified response objects to work around these UI limitations. 2. **Expand Items Issue**: The `expand_items` toggle in the GetWebset block was causing failures. This parameter has been removed as it's not essential for the basic functionality. 3. **Missing SDK Integration**: The previous implementation used raw HTTP requests instead of the official Exa SDK, making it harder to maintain and more prone to errors. 4. **Limited Functionality**: The original implementation lacked support for many Exa API features like imports, enrichments, and scope configuration. ### Changes 🏗️ <\!-- Concisely describe all of the changes made in this pull request: --> 1. **Added Pydantic models** (`model.py`): - Created comprehensive type definitions for all Exa webset objects - Added proper enums for status values and types - Structured models to match the Exa API response format 2. **Refactored websets.py**: - Replaced raw HTTP requests with the official `exa-py` SDK - Flattened nested input parameters to avoid UI issues with complex objects - Enhanced `ExaCreateWebsetBlock` with support for: - Search configuration with entity types, criteria, exclude/scope sources - Import functionality from existing sources - Enrichment configuration with multiple formats - Removed problematic `expand_items` parameter from `ExaGetWebsetBlock` - Updated response objects to use simplified `Webset` model that returns dicts for nested objects 3. **Updated webhook_blocks.py**: - Disabled the webhook block temporarily (`disabled=True`) as it needs further testing 4. **Added exa-py dependency**: - Added official Exa Python SDK to `pyproject.toml` and `poetry.lock` ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: <\!-- Put your test plan here: --> - [x] Created a new webset using the ExaCreateWebsetBlock with basic search parameters - [x] Verified the webset was created successfully in the Exa dashboard - [x] Listed websets using ExaListWebsetsBlock and confirmed pagination works - [x] Retrieved individual webset details using ExaGetWebsetBlock without expand_items - [x] Tested advanced features including entity types, criteria, and exclude sources - [x] Confirmed the UI no longer crashes when displaying webset responses - [x] Verified the Docker environment builds successfully with the new exa-py dependency #### For configuration changes: - [x] `.env.example` is updated or already compatible with my changes - [x] `docker-compose.yml` is updated or already compatible with my changes - [x] I have included a list of my configuration changes in the PR description (under **Changes**) - Added `exa-py` dependency to backend requirements ### Additional Notes - The webhook functionality has been temporarily disabled pending further testing and UI improvements - The flattened parameter approach is a workaround for current UI limitations with nested objects - Future improvements could include re-enabling nested objects once the UI supports them better
248 lines
5.5 KiB
Python
248 lines
5.5 KiB
Python
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# Enum definitions based on available options
|
|
class WebsetStatus(str, Enum):
|
|
IDLE = "idle"
|
|
PENDING = "pending"
|
|
RUNNING = "running"
|
|
PAUSED = "paused"
|
|
|
|
|
|
class WebsetSearchStatus(str, Enum):
|
|
CREATED = "created"
|
|
# Add more if known, based on example it's "created"
|
|
|
|
|
|
class ImportStatus(str, Enum):
|
|
PENDING = "pending"
|
|
# Add more if known
|
|
|
|
|
|
class ImportFormat(str, Enum):
|
|
CSV = "csv"
|
|
# Add more if known
|
|
|
|
|
|
class EnrichmentStatus(str, Enum):
|
|
PENDING = "pending"
|
|
# Add more if known
|
|
|
|
|
|
class EnrichmentFormat(str, Enum):
|
|
TEXT = "text"
|
|
# Add more if known
|
|
|
|
|
|
class MonitorStatus(str, Enum):
|
|
ENABLED = "enabled"
|
|
# Add more if known
|
|
|
|
|
|
class MonitorBehaviorType(str, Enum):
|
|
SEARCH = "search"
|
|
# Add more if known
|
|
|
|
|
|
class MonitorRunStatus(str, Enum):
|
|
CREATED = "created"
|
|
# Add more if known
|
|
|
|
|
|
class CanceledReason(str, Enum):
|
|
WEBSET_DELETED = "webset_deleted"
|
|
# Add more if known
|
|
|
|
|
|
class FailedReason(str, Enum):
|
|
INVALID_FORMAT = "invalid_format"
|
|
# Add more if known
|
|
|
|
|
|
class Confidence(str, Enum):
|
|
HIGH = "high"
|
|
# Add more if known
|
|
|
|
|
|
# Nested models
|
|
|
|
|
|
class Entity(BaseModel):
|
|
type: str
|
|
|
|
|
|
class Criterion(BaseModel):
|
|
description: str
|
|
successRate: Optional[int] = None
|
|
|
|
|
|
class ExcludeItem(BaseModel):
|
|
source: str = Field(default="import")
|
|
id: str
|
|
|
|
|
|
class Relationship(BaseModel):
|
|
definition: str
|
|
limit: Optional[float] = None
|
|
|
|
|
|
class ScopeItem(BaseModel):
|
|
source: str = Field(default="import")
|
|
id: str
|
|
relationship: Optional[Relationship] = None
|
|
|
|
|
|
class Progress(BaseModel):
|
|
found: int
|
|
analyzed: int
|
|
completion: int
|
|
timeLeft: int
|
|
|
|
|
|
class Bounds(BaseModel):
|
|
min: int
|
|
max: int
|
|
|
|
|
|
class Expected(BaseModel):
|
|
total: int
|
|
confidence: str = Field(default="high") # Use str or Confidence enum
|
|
bounds: Bounds
|
|
|
|
|
|
class Recall(BaseModel):
|
|
expected: Expected
|
|
reasoning: str
|
|
|
|
|
|
class WebsetSearch(BaseModel):
|
|
id: str
|
|
object: str = Field(default="webset_search")
|
|
status: str = Field(default="created") # Or use WebsetSearchStatus
|
|
websetId: str
|
|
query: str
|
|
entity: Entity
|
|
criteria: List[Criterion]
|
|
count: int
|
|
behavior: str = Field(default="override")
|
|
exclude: List[ExcludeItem]
|
|
scope: List[ScopeItem]
|
|
progress: Progress
|
|
recall: Recall
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
canceledAt: Optional[datetime] = None
|
|
canceledReason: Optional[str] = Field(default=None) # Or use CanceledReason
|
|
createdAt: datetime
|
|
updatedAt: datetime
|
|
|
|
|
|
class ImportEntity(BaseModel):
|
|
type: str
|
|
|
|
|
|
class Import(BaseModel):
|
|
id: str
|
|
object: str = Field(default="import")
|
|
status: str = Field(default="pending") # Or use ImportStatus
|
|
format: str = Field(default="csv") # Or use ImportFormat
|
|
entity: ImportEntity
|
|
title: str
|
|
count: int
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
failedReason: Optional[str] = Field(default=None) # Or use FailedReason
|
|
failedAt: Optional[datetime] = None
|
|
failedMessage: Optional[str] = None
|
|
createdAt: datetime
|
|
updatedAt: datetime
|
|
|
|
|
|
class Option(BaseModel):
|
|
label: str
|
|
|
|
|
|
class WebsetEnrichment(BaseModel):
|
|
id: str
|
|
object: str = Field(default="webset_enrichment")
|
|
status: str = Field(default="pending") # Or use EnrichmentStatus
|
|
websetId: str
|
|
title: str
|
|
description: str
|
|
format: str = Field(default="text") # Or use EnrichmentFormat
|
|
options: List[Option]
|
|
instructions: str
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
createdAt: datetime
|
|
updatedAt: datetime
|
|
|
|
|
|
class Cadence(BaseModel):
|
|
cron: str
|
|
timezone: str = Field(default="Etc/UTC")
|
|
|
|
|
|
class BehaviorConfig(BaseModel):
|
|
query: Optional[str] = None
|
|
criteria: Optional[List[Criterion]] = None
|
|
entity: Optional[Entity] = None
|
|
count: Optional[int] = None
|
|
behavior: Optional[str] = Field(default=None)
|
|
|
|
|
|
class Behavior(BaseModel):
|
|
type: str = Field(default="search") # Or use MonitorBehaviorType
|
|
config: BehaviorConfig
|
|
|
|
|
|
class MonitorRun(BaseModel):
|
|
id: str
|
|
object: str = Field(default="monitor_run")
|
|
status: str = Field(default="created") # Or use MonitorRunStatus
|
|
monitorId: str
|
|
type: str = Field(default="search")
|
|
completedAt: Optional[datetime] = None
|
|
failedAt: Optional[datetime] = None
|
|
failedReason: Optional[str] = None
|
|
canceledAt: Optional[datetime] = None
|
|
createdAt: datetime
|
|
updatedAt: datetime
|
|
|
|
|
|
class Monitor(BaseModel):
|
|
id: str
|
|
object: str = Field(default="monitor")
|
|
status: str = Field(default="enabled") # Or use MonitorStatus
|
|
websetId: str
|
|
cadence: Cadence
|
|
behavior: Behavior
|
|
lastRun: Optional[MonitorRun] = None
|
|
nextRunAt: Optional[datetime] = None
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
createdAt: datetime
|
|
updatedAt: datetime
|
|
|
|
|
|
class Webset(BaseModel):
|
|
id: str
|
|
object: str = Field(default="webset")
|
|
status: WebsetStatus
|
|
externalId: Optional[str] = None
|
|
title: Optional[str] = None
|
|
searches: List[WebsetSearch]
|
|
imports: List[Import]
|
|
enrichments: List[WebsetEnrichment]
|
|
monitors: List[Monitor]
|
|
streams: List[Any]
|
|
createdAt: datetime
|
|
updatedAt: datetime
|
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
|
|
|
|
class ListWebsets(BaseModel):
|
|
data: List[Webset]
|
|
hasMore: bool
|
|
nextCursor: Optional[str] = None
|