mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(blocks): update exa websets implementation (#10521)
## Summary This PR fixes and enhances the Exa Websets implementation to resolve issues with the expand_items parameter and improve the overall block functionality. The changes address UI limitations with nested response objects while providing a more comprehensive and user-friendly interface for creating and managing Exa websets. [Websets_v14.json](https://github.com/user-attachments/files/21596313/Websets_v14.json) <img width="1335" height="949" alt="Screenshot 2025-08-05 at 11 45 07" src="https://github.com/user-attachments/assets/3a9b3da0-3950-4388-96b2-e5dfa9df9b67" /> **Why these changes are necessary:** 1. **UI Compatibility**: The current implementation returns deeply nested objects that cause the UI to crash. This PR flattens the input parameters and returns simplified response objects to work around these UI limitations. 2. **Expand Items Issue**: The `expand_items` toggle in the GetWebset block was causing failures. This parameter has been removed as it's not essential for the basic functionality. 3. **Missing SDK Integration**: The previous implementation used raw HTTP requests instead of the official Exa SDK, making it harder to maintain and more prone to errors. 4. **Limited Functionality**: The original implementation lacked support for many Exa API features like imports, enrichments, and scope configuration. ### Changes 🏗️ <\!-- Concisely describe all of the changes made in this pull request: --> 1. **Added Pydantic models** (`model.py`): - Created comprehensive type definitions for all Exa webset objects - Added proper enums for status values and types - Structured models to match the Exa API response format 2. **Refactored websets.py**: - Replaced raw HTTP requests with the official `exa-py` SDK - Flattened nested input parameters to avoid UI issues with complex objects - Enhanced `ExaCreateWebsetBlock` with support for: - Search configuration with entity types, criteria, exclude/scope sources - Import functionality from existing sources - Enrichment configuration with multiple formats - Removed problematic `expand_items` parameter from `ExaGetWebsetBlock` - Updated response objects to use simplified `Webset` model that returns dicts for nested objects 3. **Updated webhook_blocks.py**: - Disabled the webhook block temporarily (`disabled=True`) as it needs further testing 4. **Added exa-py dependency**: - Added official Exa Python SDK to `pyproject.toml` and `poetry.lock` ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: <\!-- Put your test plan here: --> - [x] Created a new webset using the ExaCreateWebsetBlock with basic search parameters - [x] Verified the webset was created successfully in the Exa dashboard - [x] Listed websets using ExaListWebsetsBlock and confirmed pagination works - [x] Retrieved individual webset details using ExaGetWebsetBlock without expand_items - [x] Tested advanced features including entity types, criteria, and exclude sources - [x] Confirmed the UI no longer crashes when displaying webset responses - [x] Verified the Docker environment builds successfully with the new exa-py dependency #### For configuration changes: - [x] `.env.example` is updated or already compatible with my changes - [x] `docker-compose.yml` is updated or already compatible with my changes - [x] I have included a list of my configuration changes in the PR description (under **Changes**) - Added `exa-py` dependency to backend requirements ### Additional Notes - The webhook functionality has been temporarily disabled pending further testing and UI improvements - The flattened parameter approach is a workaround for current UI limitations with nested objects - Future improvements could include re-enabling nested objects once the UI supports them better
This commit is contained in:
247
autogpt_platform/backend/backend/blocks/exa/model.py
Normal file
247
autogpt_platform/backend/backend/blocks/exa/model.py
Normal file
@@ -0,0 +1,247 @@
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# Enum definitions based on available options
|
||||
class WebsetStatus(str, Enum):
|
||||
IDLE = "idle"
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
PAUSED = "paused"
|
||||
|
||||
|
||||
class WebsetSearchStatus(str, Enum):
|
||||
CREATED = "created"
|
||||
# Add more if known, based on example it's "created"
|
||||
|
||||
|
||||
class ImportStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class ImportFormat(str, Enum):
|
||||
CSV = "csv"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class EnrichmentStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class EnrichmentFormat(str, Enum):
|
||||
TEXT = "text"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class MonitorStatus(str, Enum):
|
||||
ENABLED = "enabled"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class MonitorBehaviorType(str, Enum):
|
||||
SEARCH = "search"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class MonitorRunStatus(str, Enum):
|
||||
CREATED = "created"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class CanceledReason(str, Enum):
|
||||
WEBSET_DELETED = "webset_deleted"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class FailedReason(str, Enum):
|
||||
INVALID_FORMAT = "invalid_format"
|
||||
# Add more if known
|
||||
|
||||
|
||||
class Confidence(str, Enum):
|
||||
HIGH = "high"
|
||||
# Add more if known
|
||||
|
||||
|
||||
# Nested models
|
||||
|
||||
|
||||
class Entity(BaseModel):
|
||||
type: str
|
||||
|
||||
|
||||
class Criterion(BaseModel):
|
||||
description: str
|
||||
successRate: Optional[int] = None
|
||||
|
||||
|
||||
class ExcludeItem(BaseModel):
|
||||
source: str = Field(default="import")
|
||||
id: str
|
||||
|
||||
|
||||
class Relationship(BaseModel):
|
||||
definition: str
|
||||
limit: Optional[float] = None
|
||||
|
||||
|
||||
class ScopeItem(BaseModel):
|
||||
source: str = Field(default="import")
|
||||
id: str
|
||||
relationship: Optional[Relationship] = None
|
||||
|
||||
|
||||
class Progress(BaseModel):
|
||||
found: int
|
||||
analyzed: int
|
||||
completion: int
|
||||
timeLeft: int
|
||||
|
||||
|
||||
class Bounds(BaseModel):
|
||||
min: int
|
||||
max: int
|
||||
|
||||
|
||||
class Expected(BaseModel):
|
||||
total: int
|
||||
confidence: str = Field(default="high") # Use str or Confidence enum
|
||||
bounds: Bounds
|
||||
|
||||
|
||||
class Recall(BaseModel):
|
||||
expected: Expected
|
||||
reasoning: str
|
||||
|
||||
|
||||
class WebsetSearch(BaseModel):
|
||||
id: str
|
||||
object: str = Field(default="webset_search")
|
||||
status: str = Field(default="created") # Or use WebsetSearchStatus
|
||||
websetId: str
|
||||
query: str
|
||||
entity: Entity
|
||||
criteria: List[Criterion]
|
||||
count: int
|
||||
behavior: str = Field(default="override")
|
||||
exclude: List[ExcludeItem]
|
||||
scope: List[ScopeItem]
|
||||
progress: Progress
|
||||
recall: Recall
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
canceledAt: Optional[datetime] = None
|
||||
canceledReason: Optional[str] = Field(default=None) # Or use CanceledReason
|
||||
createdAt: datetime
|
||||
updatedAt: datetime
|
||||
|
||||
|
||||
class ImportEntity(BaseModel):
|
||||
type: str
|
||||
|
||||
|
||||
class Import(BaseModel):
|
||||
id: str
|
||||
object: str = Field(default="import")
|
||||
status: str = Field(default="pending") # Or use ImportStatus
|
||||
format: str = Field(default="csv") # Or use ImportFormat
|
||||
entity: ImportEntity
|
||||
title: str
|
||||
count: int
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
failedReason: Optional[str] = Field(default=None) # Or use FailedReason
|
||||
failedAt: Optional[datetime] = None
|
||||
failedMessage: Optional[str] = None
|
||||
createdAt: datetime
|
||||
updatedAt: datetime
|
||||
|
||||
|
||||
class Option(BaseModel):
|
||||
label: str
|
||||
|
||||
|
||||
class WebsetEnrichment(BaseModel):
|
||||
id: str
|
||||
object: str = Field(default="webset_enrichment")
|
||||
status: str = Field(default="pending") # Or use EnrichmentStatus
|
||||
websetId: str
|
||||
title: str
|
||||
description: str
|
||||
format: str = Field(default="text") # Or use EnrichmentFormat
|
||||
options: List[Option]
|
||||
instructions: str
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
createdAt: datetime
|
||||
updatedAt: datetime
|
||||
|
||||
|
||||
class Cadence(BaseModel):
|
||||
cron: str
|
||||
timezone: str = Field(default="Etc/UTC")
|
||||
|
||||
|
||||
class BehaviorConfig(BaseModel):
|
||||
query: Optional[str] = None
|
||||
criteria: Optional[List[Criterion]] = None
|
||||
entity: Optional[Entity] = None
|
||||
count: Optional[int] = None
|
||||
behavior: Optional[str] = Field(default=None)
|
||||
|
||||
|
||||
class Behavior(BaseModel):
|
||||
type: str = Field(default="search") # Or use MonitorBehaviorType
|
||||
config: BehaviorConfig
|
||||
|
||||
|
||||
class MonitorRun(BaseModel):
|
||||
id: str
|
||||
object: str = Field(default="monitor_run")
|
||||
status: str = Field(default="created") # Or use MonitorRunStatus
|
||||
monitorId: str
|
||||
type: str = Field(default="search")
|
||||
completedAt: Optional[datetime] = None
|
||||
failedAt: Optional[datetime] = None
|
||||
failedReason: Optional[str] = None
|
||||
canceledAt: Optional[datetime] = None
|
||||
createdAt: datetime
|
||||
updatedAt: datetime
|
||||
|
||||
|
||||
class Monitor(BaseModel):
|
||||
id: str
|
||||
object: str = Field(default="monitor")
|
||||
status: str = Field(default="enabled") # Or use MonitorStatus
|
||||
websetId: str
|
||||
cadence: Cadence
|
||||
behavior: Behavior
|
||||
lastRun: Optional[MonitorRun] = None
|
||||
nextRunAt: Optional[datetime] = None
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
createdAt: datetime
|
||||
updatedAt: datetime
|
||||
|
||||
|
||||
class Webset(BaseModel):
|
||||
id: str
|
||||
object: str = Field(default="webset")
|
||||
status: WebsetStatus
|
||||
externalId: Optional[str] = None
|
||||
title: Optional[str] = None
|
||||
searches: List[WebsetSearch]
|
||||
imports: List[Import]
|
||||
enrichments: List[WebsetEnrichment]
|
||||
monitors: List[Monitor]
|
||||
streams: List[Any]
|
||||
createdAt: datetime
|
||||
updatedAt: datetime
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ListWebsets(BaseModel):
|
||||
data: List[Webset]
|
||||
hasMore: bool
|
||||
nextCursor: Optional[str] = None
|
||||
@@ -114,6 +114,7 @@ class ExaWebsetWebhookBlock(Block):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
disabled=True,
|
||||
id="d0204ed8-8b81-408d-8b8d-ed087a546228",
|
||||
description="Receive webhook notifications for Exa webset events",
|
||||
categories={BlockCategory.INPUT},
|
||||
|
||||
@@ -1,7 +1,33 @@
|
||||
from typing import Any, Optional
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Annotated, Any, Dict, List, Optional
|
||||
|
||||
from exa_py import Exa
|
||||
from exa_py.websets.types import (
|
||||
CreateCriterionParameters,
|
||||
CreateEnrichmentParameters,
|
||||
CreateWebsetParameters,
|
||||
CreateWebsetParametersSearch,
|
||||
ExcludeItem,
|
||||
Format,
|
||||
ImportItem,
|
||||
ImportSource,
|
||||
Option,
|
||||
ScopeItem,
|
||||
ScopeRelationship,
|
||||
ScopeSourceType,
|
||||
WebsetArticleEntity,
|
||||
WebsetCompanyEntity,
|
||||
WebsetCustomEntity,
|
||||
WebsetPersonEntity,
|
||||
WebsetResearchPaperEntity,
|
||||
WebsetStatus,
|
||||
)
|
||||
from pydantic import Field
|
||||
|
||||
from backend.sdk import (
|
||||
APIKeyCredentials,
|
||||
BaseModel,
|
||||
Block,
|
||||
BlockCategory,
|
||||
BlockOutput,
|
||||
@@ -12,7 +38,69 @@ from backend.sdk import (
|
||||
)
|
||||
|
||||
from ._config import exa
|
||||
from .helpers import WebsetEnrichmentConfig, WebsetSearchConfig
|
||||
|
||||
|
||||
class SearchEntityType(str, Enum):
|
||||
COMPANY = "company"
|
||||
PERSON = "person"
|
||||
ARTICLE = "article"
|
||||
RESEARCH_PAPER = "research_paper"
|
||||
CUSTOM = "custom"
|
||||
AUTO = "auto"
|
||||
|
||||
|
||||
class SearchType(str, Enum):
|
||||
IMPORT = "import"
|
||||
WEBSET = "webset"
|
||||
|
||||
|
||||
class EnrichmentFormat(str, Enum):
|
||||
TEXT = "text"
|
||||
DATE = "date"
|
||||
NUMBER = "number"
|
||||
OPTIONS = "options"
|
||||
EMAIL = "email"
|
||||
PHONE = "phone"
|
||||
|
||||
|
||||
class Webset(BaseModel):
|
||||
id: str
|
||||
status: WebsetStatus | None = Field(..., title="WebsetStatus")
|
||||
"""
|
||||
The status of the webset
|
||||
"""
|
||||
external_id: Annotated[Optional[str], Field(alias="externalId")] = None
|
||||
"""
|
||||
The external identifier for the webset
|
||||
NOTE: Returning dict to avoid ui crashing due to nested objects
|
||||
"""
|
||||
searches: List[dict[str, Any]] | None = None
|
||||
"""
|
||||
The searches that have been performed on the webset.
|
||||
NOTE: Returning dict to avoid ui crashing due to nested objects
|
||||
"""
|
||||
enrichments: List[dict[str, Any]] | None = None
|
||||
"""
|
||||
The Enrichments to apply to the Webset Items.
|
||||
NOTE: Returning dict to avoid ui crashing due to nested objects
|
||||
"""
|
||||
monitors: List[dict[str, Any]] | None = None
|
||||
"""
|
||||
The Monitors for the Webset.
|
||||
NOTE: Returning dict to avoid ui crashing due to nested objects
|
||||
"""
|
||||
metadata: Optional[Dict[str, Any]] = {}
|
||||
"""
|
||||
Set of key-value pairs you want to associate with this object.
|
||||
"""
|
||||
created_at: Annotated[datetime, Field(alias="createdAt")] | None = None
|
||||
"""
|
||||
The date and time the webset was created
|
||||
"""
|
||||
updated_at: Annotated[datetime, Field(alias="updatedAt")] | None = None
|
||||
"""
|
||||
The date and time the webset was last updated
|
||||
"""
|
||||
|
||||
|
||||
class ExaCreateWebsetBlock(Block):
|
||||
@@ -20,40 +108,121 @@ class ExaCreateWebsetBlock(Block):
|
||||
credentials: CredentialsMetaInput = exa.credentials_field(
|
||||
description="The Exa integration requires an API Key."
|
||||
)
|
||||
search: WebsetSearchConfig = SchemaField(
|
||||
description="Initial search configuration for the Webset"
|
||||
|
||||
# Search parameters (flattened)
|
||||
search_query: str = SchemaField(
|
||||
description="Your search query. Use this to describe what you are looking for. Any URL provided will be crawled and used as context for the search.",
|
||||
placeholder="Marketing agencies based in the US, that focus on consumer products",
|
||||
)
|
||||
enrichments: Optional[list[WebsetEnrichmentConfig]] = SchemaField(
|
||||
default=None,
|
||||
description="Enrichments to apply to Webset items",
|
||||
search_count: Optional[int] = SchemaField(
|
||||
default=10,
|
||||
description="Number of items the search will attempt to find. The actual number of items found may be less than this number depending on the search complexity.",
|
||||
ge=1,
|
||||
le=1000,
|
||||
)
|
||||
search_entity_type: SearchEntityType = SchemaField(
|
||||
default=SearchEntityType.AUTO,
|
||||
description="Entity type: 'company', 'person', 'article', 'research_paper', or 'custom'. If not provided, we automatically detect the entity from the query.",
|
||||
advanced=True,
|
||||
)
|
||||
search_entity_description: Optional[str] = SchemaField(
|
||||
default=None,
|
||||
description="Description for custom entity type (required when search_entity_type is 'custom')",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# Search criteria (flattened)
|
||||
search_criteria: list[str] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of criteria descriptions that every item will be evaluated against. If not provided, we automatically detect the criteria from the query.",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# Search exclude sources (flattened)
|
||||
search_exclude_sources: list[str] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of source IDs (imports or websets) to exclude from search results",
|
||||
advanced=True,
|
||||
)
|
||||
search_exclude_types: list[SearchType] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of source types corresponding to exclude sources ('import' or 'webset')",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# Search scope sources (flattened)
|
||||
search_scope_sources: list[str] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of source IDs (imports or websets) to limit search scope to",
|
||||
advanced=True,
|
||||
)
|
||||
search_scope_types: list[SearchType] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of source types corresponding to scope sources ('import' or 'webset')",
|
||||
advanced=True,
|
||||
)
|
||||
search_scope_relationships: list[str] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of relationship definitions for hop searches (optional, one per scope source)",
|
||||
advanced=True,
|
||||
)
|
||||
search_scope_relationship_limits: list[int] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of limits on the number of related entities to find (optional, one per scope relationship)",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# Import parameters (flattened)
|
||||
import_sources: list[str] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of source IDs to import from",
|
||||
advanced=True,
|
||||
)
|
||||
import_types: list[SearchType] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of source types corresponding to import sources ('import' or 'webset')",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# Enrichment parameters (flattened)
|
||||
enrichment_descriptions: list[str] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of enrichment task descriptions to perform on each webset item",
|
||||
advanced=True,
|
||||
)
|
||||
enrichment_formats: list[EnrichmentFormat] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of formats for enrichment responses ('text', 'date', 'number', 'options', 'email', 'phone'). If not specified, we automatically select the best format.",
|
||||
advanced=True,
|
||||
)
|
||||
enrichment_options: list[list[str]] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of option lists for enrichments with 'options' format. Each inner list contains the option labels.",
|
||||
advanced=True,
|
||||
)
|
||||
enrichment_metadata: list[dict] = SchemaField(
|
||||
default_factory=list,
|
||||
description="List of metadata dictionaries for enrichments",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
# Webset metadata
|
||||
external_id: Optional[str] = SchemaField(
|
||||
default=None,
|
||||
description="External identifier for the webset",
|
||||
description="External identifier for the webset. You can use this to reference the webset by your own internal identifiers.",
|
||||
placeholder="my-webset-123",
|
||||
advanced=True,
|
||||
)
|
||||
metadata: Optional[dict] = SchemaField(
|
||||
default=None,
|
||||
default_factory=dict,
|
||||
description="Key-value pairs to associate with this webset",
|
||||
advanced=True,
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
webset_id: str = SchemaField(
|
||||
webset: Webset = SchemaField(
|
||||
description="The unique identifier for the created webset"
|
||||
)
|
||||
status: str = SchemaField(description="The status of the webset")
|
||||
external_id: Optional[str] = SchemaField(
|
||||
description="The external identifier for the webset", default=None
|
||||
)
|
||||
created_at: str = SchemaField(
|
||||
description="The date and time the webset was created"
|
||||
)
|
||||
error: str = SchemaField(
|
||||
description="Error message if the request failed", default=""
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
@@ -67,44 +236,171 @@ class ExaCreateWebsetBlock(Block):
|
||||
async def run(
|
||||
self, input_data: Input, *, credentials: APIKeyCredentials, **kwargs
|
||||
) -> BlockOutput:
|
||||
url = "https://api.exa.ai/websets/v0/websets"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": credentials.api_key.get_secret_value(),
|
||||
}
|
||||
|
||||
# Build the payload
|
||||
payload: dict[str, Any] = {
|
||||
"search": input_data.search.model_dump(exclude_none=True),
|
||||
}
|
||||
exa = Exa(credentials.api_key.get_secret_value())
|
||||
|
||||
# Convert enrichments to API format
|
||||
if input_data.enrichments:
|
||||
enrichments_data = []
|
||||
for enrichment in input_data.enrichments:
|
||||
enrichments_data.append(enrichment.model_dump(exclude_none=True))
|
||||
payload["enrichments"] = enrichments_data
|
||||
# ------------------------------------------------------------
|
||||
# Build entity (if explicitly provided)
|
||||
# ------------------------------------------------------------
|
||||
entity = None
|
||||
if input_data.search_entity_type == SearchEntityType.COMPANY:
|
||||
entity = WebsetCompanyEntity(type="company")
|
||||
elif input_data.search_entity_type == SearchEntityType.PERSON:
|
||||
entity = WebsetPersonEntity(type="person")
|
||||
elif input_data.search_entity_type == SearchEntityType.ARTICLE:
|
||||
entity = WebsetArticleEntity(type="article")
|
||||
elif input_data.search_entity_type == SearchEntityType.RESEARCH_PAPER:
|
||||
entity = WebsetResearchPaperEntity(type="research_paper")
|
||||
elif (
|
||||
input_data.search_entity_type == SearchEntityType.CUSTOM
|
||||
and input_data.search_entity_description
|
||||
):
|
||||
entity = WebsetCustomEntity(
|
||||
type="custom", description=input_data.search_entity_description
|
||||
)
|
||||
|
||||
if input_data.external_id:
|
||||
payload["externalId"] = input_data.external_id
|
||||
# ------------------------------------------------------------
|
||||
# Build criteria list
|
||||
# ------------------------------------------------------------
|
||||
criteria = None
|
||||
if input_data.search_criteria:
|
||||
criteria = [
|
||||
CreateCriterionParameters(description=item)
|
||||
for item in input_data.search_criteria
|
||||
]
|
||||
|
||||
if input_data.metadata:
|
||||
payload["metadata"] = input_data.metadata
|
||||
# ------------------------------------------------------------
|
||||
# Build exclude sources list
|
||||
# ------------------------------------------------------------
|
||||
exclude_items = None
|
||||
if input_data.search_exclude_sources:
|
||||
exclude_items = []
|
||||
for idx, src_id in enumerate(input_data.search_exclude_sources):
|
||||
src_type = None
|
||||
if input_data.search_exclude_types and idx < len(
|
||||
input_data.search_exclude_types
|
||||
):
|
||||
src_type = input_data.search_exclude_types[idx]
|
||||
# Default to IMPORT if type missing
|
||||
if src_type == SearchType.WEBSET:
|
||||
source_enum = ImportSource.webset
|
||||
else:
|
||||
source_enum = ImportSource.import_
|
||||
exclude_items.append(ExcludeItem(source=source_enum, id=src_id))
|
||||
|
||||
try:
|
||||
response = await Requests().post(url, headers=headers, json=payload)
|
||||
data = response.json()
|
||||
# ------------------------------------------------------------
|
||||
# Build scope list
|
||||
# ------------------------------------------------------------
|
||||
scope_items = None
|
||||
if input_data.search_scope_sources:
|
||||
scope_items = []
|
||||
for idx, src_id in enumerate(input_data.search_scope_sources):
|
||||
src_type = None
|
||||
if input_data.search_scope_types and idx < len(
|
||||
input_data.search_scope_types
|
||||
):
|
||||
src_type = input_data.search_scope_types[idx]
|
||||
relationship = None
|
||||
if input_data.search_scope_relationships and idx < len(
|
||||
input_data.search_scope_relationships
|
||||
):
|
||||
rel_def = input_data.search_scope_relationships[idx]
|
||||
lim = None
|
||||
if input_data.search_scope_relationship_limits and idx < len(
|
||||
input_data.search_scope_relationship_limits
|
||||
):
|
||||
lim = input_data.search_scope_relationship_limits[idx]
|
||||
relationship = ScopeRelationship(definition=rel_def, limit=lim)
|
||||
if src_type == SearchType.WEBSET:
|
||||
src_enum = ScopeSourceType.webset
|
||||
else:
|
||||
src_enum = ScopeSourceType.import_
|
||||
scope_items.append(
|
||||
ScopeItem(source=src_enum, id=src_id, relationship=relationship)
|
||||
)
|
||||
|
||||
yield "webset_id", data.get("id", "")
|
||||
yield "status", data.get("status", "")
|
||||
yield "external_id", data.get("externalId")
|
||||
yield "created_at", data.get("createdAt", "")
|
||||
# ------------------------------------------------------------
|
||||
# Assemble search parameters (only if a query is provided)
|
||||
# ------------------------------------------------------------
|
||||
search_params = None
|
||||
if input_data.search_query:
|
||||
search_params = CreateWebsetParametersSearch(
|
||||
query=input_data.search_query,
|
||||
count=input_data.search_count,
|
||||
entity=entity,
|
||||
criteria=criteria,
|
||||
exclude=exclude_items,
|
||||
scope=scope_items,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
yield "error", str(e)
|
||||
yield "webset_id", ""
|
||||
yield "status", ""
|
||||
yield "created_at", ""
|
||||
# ------------------------------------------------------------
|
||||
# Build imports list
|
||||
# ------------------------------------------------------------
|
||||
imports_params = None
|
||||
if input_data.import_sources:
|
||||
imports_params = []
|
||||
for idx, src_id in enumerate(input_data.import_sources):
|
||||
src_type = None
|
||||
if input_data.import_types and idx < len(input_data.import_types):
|
||||
src_type = input_data.import_types[idx]
|
||||
if src_type == SearchType.WEBSET:
|
||||
source_enum = ImportSource.webset
|
||||
else:
|
||||
source_enum = ImportSource.import_
|
||||
imports_params.append(ImportItem(source=source_enum, id=src_id))
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Build enrichment list
|
||||
# ------------------------------------------------------------
|
||||
enrichments_params = None
|
||||
if input_data.enrichment_descriptions:
|
||||
enrichments_params = []
|
||||
for idx, desc in enumerate(input_data.enrichment_descriptions):
|
||||
fmt = None
|
||||
if input_data.enrichment_formats and idx < len(
|
||||
input_data.enrichment_formats
|
||||
):
|
||||
fmt_enum = input_data.enrichment_formats[idx]
|
||||
if fmt_enum is not None:
|
||||
fmt = Format(
|
||||
fmt_enum.value if isinstance(fmt_enum, Enum) else fmt_enum
|
||||
)
|
||||
options_list = None
|
||||
if input_data.enrichment_options and idx < len(
|
||||
input_data.enrichment_options
|
||||
):
|
||||
raw_opts = input_data.enrichment_options[idx]
|
||||
if raw_opts:
|
||||
options_list = [Option(label=o) for o in raw_opts]
|
||||
metadata_obj = None
|
||||
if input_data.enrichment_metadata and idx < len(
|
||||
input_data.enrichment_metadata
|
||||
):
|
||||
metadata_obj = input_data.enrichment_metadata[idx]
|
||||
enrichments_params.append(
|
||||
CreateEnrichmentParameters(
|
||||
description=desc,
|
||||
format=fmt,
|
||||
options=options_list,
|
||||
metadata=metadata_obj,
|
||||
)
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Create the webset
|
||||
# ------------------------------------------------------------
|
||||
webset = exa.websets.create(
|
||||
params=CreateWebsetParameters(
|
||||
search=search_params,
|
||||
imports=imports_params,
|
||||
enrichments=enrichments_params,
|
||||
external_id=input_data.external_id,
|
||||
metadata=input_data.metadata,
|
||||
)
|
||||
)
|
||||
|
||||
# Use alias field names returned from Exa SDK so that nested models validate correctly
|
||||
yield "webset", Webset.model_validate(webset.model_dump(by_alias=True))
|
||||
|
||||
|
||||
class ExaUpdateWebsetBlock(Block):
|
||||
@@ -183,6 +479,11 @@ class ExaListWebsetsBlock(Block):
|
||||
credentials: CredentialsMetaInput = exa.credentials_field(
|
||||
description="The Exa integration requires an API Key."
|
||||
)
|
||||
trigger: Any | None = SchemaField(
|
||||
default=None,
|
||||
description="Trigger for the webset, value is ignored!",
|
||||
advanced=False,
|
||||
)
|
||||
cursor: Optional[str] = SchemaField(
|
||||
default=None,
|
||||
description="Cursor for pagination through results",
|
||||
@@ -197,7 +498,9 @@ class ExaListWebsetsBlock(Block):
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
websets: list = SchemaField(description="List of websets", default_factory=list)
|
||||
websets: list[Webset] = SchemaField(
|
||||
description="List of websets", default_factory=list
|
||||
)
|
||||
has_more: bool = SchemaField(
|
||||
description="Whether there are more results to paginate through",
|
||||
default=False,
|
||||
@@ -255,9 +558,6 @@ class ExaGetWebsetBlock(Block):
|
||||
description="The ID or external ID of the Webset to retrieve",
|
||||
placeholder="webset-id-or-external-id",
|
||||
)
|
||||
expand_items: bool = SchemaField(
|
||||
default=False, description="Include items in the response", advanced=True
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
webset_id: str = SchemaField(description="The unique identifier for the webset")
|
||||
@@ -309,12 +609,8 @@ class ExaGetWebsetBlock(Block):
|
||||
"x-api-key": credentials.api_key.get_secret_value(),
|
||||
}
|
||||
|
||||
params = {}
|
||||
if input_data.expand_items:
|
||||
params["expand[]"] = "items"
|
||||
|
||||
try:
|
||||
response = await Requests().get(url, headers=headers, params=params)
|
||||
response = await Requests().get(url, headers=headers)
|
||||
data = response.json()
|
||||
|
||||
yield "webset_id", data.get("id", "")
|
||||
|
||||
21
autogpt_platform/backend/poetry.lock
generated
21
autogpt_platform/backend/poetry.lock
generated
@@ -1079,6 +1079,25 @@ files = [
|
||||
dnspython = ">=2.0.0"
|
||||
idna = ">=2.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "exa-py"
|
||||
version = "1.14.20"
|
||||
description = "Python SDK for Exa API."
|
||||
optional = false
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "exa_py-1.14.20-py3-none-any.whl", hash = "sha256:e0ed9d99c3c494a0e6903e11a0f6fb773b3b23d0cd802380cf58efc97d9d332d"},
|
||||
{file = "exa_py-1.14.20.tar.gz", hash = "sha256:423789a0635b7a4ecd5f56d6b4a0dfb01126fa45ce1e04106c0bb96b7d551ebf"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
httpx = ">=0.28.1"
|
||||
openai = ">=1.48"
|
||||
pydantic = ">=2.10.6"
|
||||
requests = ">=2.32.3"
|
||||
typing-extensions = ">=4.12.2"
|
||||
|
||||
[[package]]
|
||||
name = "exceptiongroup"
|
||||
version = "1.3.0"
|
||||
@@ -6718,4 +6737,4 @@ cffi = ["cffi (>=1.11)"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<3.13"
|
||||
content-hash = "225ddae645d22cc57f46330e735c069fb52e708123aa642e74adbf077dda0796"
|
||||
content-hash = "05e2b99bd6dc5a74a89df0e1e504853b66bd519062159b0de5fbedf6a1f4d986"
|
||||
|
||||
@@ -75,6 +75,7 @@ setuptools = "^80.9.0"
|
||||
gcloud-aio-storage = "^9.5.0"
|
||||
pandas = "^2.3.1"
|
||||
firecrawl-py = "^2.16.3"
|
||||
exa-py = "^1.14.20"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
aiohappyeyeballs = "^2.6.1"
|
||||
|
||||
Reference in New Issue
Block a user