mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
feat(classic): add utility components for agent capabilities
Add 6 new utility components to expand agent functionality: - ArchiveHandlerComponent: ZIP/TAR archive operations (create, extract, list) - ClipboardComponent: In-memory clipboard for copy/paste operations - DataProcessorComponent: CSV/JSON data manipulation and analysis - HTTPClientComponent: HTTP requests (GET, POST, PUT, DELETE) - MathUtilsComponent: Mathematical calculations and statistics - TextUtilsComponent: Text processing (regex, diff, encoding, hashing) All components follow the forge component pattern with: - CommandProvider for exposing commands - DirectiveProvider for resources/best practices - Comprehensive parameter validation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
from forge.components.archive_handler.archive_handler import (
|
||||
ArchiveHandlerComponent,
|
||||
ArchiveHandlerConfiguration,
|
||||
)
|
||||
|
||||
__all__ = ["ArchiveHandlerComponent", "ArchiveHandlerConfiguration"]
|
||||
@@ -0,0 +1,383 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tarfile
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from forge.agent.components import ConfigurableComponent
|
||||
from forge.agent.protocols import CommandProvider, DirectiveProvider
|
||||
from forge.command import Command, command
|
||||
from forge.file_storage.base import FileStorage
|
||||
from forge.models.json_schema import JSONSchema
|
||||
from forge.utils.exceptions import CommandExecutionError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArchiveHandlerConfiguration(BaseModel):
|
||||
max_archive_size: int = Field(
|
||||
default=100 * 1024 * 1024, # 100MB
|
||||
description="Maximum archive size in bytes",
|
||||
)
|
||||
max_extracted_size: int = Field(
|
||||
default=500 * 1024 * 1024, # 500MB
|
||||
description="Maximum total size of extracted files",
|
||||
)
|
||||
max_files: int = Field(
|
||||
default=10000,
|
||||
description="Maximum number of files in archive",
|
||||
)
|
||||
|
||||
|
||||
class ArchiveHandlerComponent(
|
||||
DirectiveProvider,
|
||||
CommandProvider,
|
||||
ConfigurableComponent[ArchiveHandlerConfiguration],
|
||||
):
|
||||
"""Provides commands to create, extract, and list archive files."""
|
||||
|
||||
config_class = ArchiveHandlerConfiguration
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
workspace: FileStorage,
|
||||
config: Optional[ArchiveHandlerConfiguration] = None,
|
||||
):
|
||||
ConfigurableComponent.__init__(self, config)
|
||||
self.workspace = workspace
|
||||
|
||||
def get_resources(self) -> Iterator[str]:
|
||||
yield "Ability to create and extract zip/tar archives."
|
||||
|
||||
def get_commands(self) -> Iterator[Command]:
|
||||
yield self.create_archive
|
||||
yield self.extract_archive
|
||||
yield self.list_archive
|
||||
|
||||
def _get_archive_type(self, path: str) -> str:
|
||||
"""Determine archive type from filename."""
|
||||
path_lower = path.lower()
|
||||
if path_lower.endswith(".zip"):
|
||||
return "zip"
|
||||
elif path_lower.endswith((".tar.gz", ".tgz")):
|
||||
return "tar.gz"
|
||||
elif path_lower.endswith((".tar.bz2", ".tbz2")):
|
||||
return "tar.bz2"
|
||||
elif path_lower.endswith(".tar"):
|
||||
return "tar"
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
@command(
|
||||
["create_archive", "zip_files", "compress"],
|
||||
"Create a zip or tar archive from files or directories.",
|
||||
{
|
||||
"output_path": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Path for the output archive (e.g., 'backup.zip', 'files.tar.gz')",
|
||||
required=True,
|
||||
),
|
||||
"source_paths": JSONSchema(
|
||||
type=JSONSchema.Type.ARRAY,
|
||||
items=JSONSchema(type=JSONSchema.Type.STRING),
|
||||
description="List of files or directories to archive",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def create_archive(self, output_path: str, source_paths: list[str]) -> str:
|
||||
"""Create an archive from specified files/directories.
|
||||
|
||||
Args:
|
||||
output_path: Path for the output archive
|
||||
source_paths: List of files/directories to include
|
||||
|
||||
Returns:
|
||||
str: Success message with archive details
|
||||
"""
|
||||
archive_type = self._get_archive_type(output_path)
|
||||
|
||||
if archive_type == "unknown":
|
||||
raise CommandExecutionError(
|
||||
"Unsupported archive format. Use .zip, .tar, .tar.gz, or .tar.bz2"
|
||||
)
|
||||
|
||||
# Validate source paths exist
|
||||
for path in source_paths:
|
||||
if not self.workspace.exists(path):
|
||||
raise CommandExecutionError(f"Source path '{path}' does not exist")
|
||||
|
||||
full_output = self.workspace.get_path(output_path)
|
||||
|
||||
# Create parent directory if needed
|
||||
if directory := os.path.dirname(output_path):
|
||||
self.workspace.make_dir(directory)
|
||||
|
||||
file_count = 0
|
||||
total_size = 0
|
||||
|
||||
try:
|
||||
if archive_type == "zip":
|
||||
with zipfile.ZipFile(full_output, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
for source in source_paths:
|
||||
source_path = self.workspace.get_path(source)
|
||||
if source_path.is_file():
|
||||
zf.write(source_path, source)
|
||||
file_count += 1
|
||||
total_size += source_path.stat().st_size
|
||||
elif source_path.is_dir():
|
||||
for file in source_path.rglob("*"):
|
||||
if file.is_file():
|
||||
arcname = str(
|
||||
Path(source) / file.relative_to(source_path)
|
||||
)
|
||||
zf.write(file, arcname)
|
||||
file_count += 1
|
||||
total_size += file.stat().st_size
|
||||
else:
|
||||
# Tar formats
|
||||
mode = "w"
|
||||
if archive_type == "tar.gz":
|
||||
mode = "w:gz"
|
||||
elif archive_type == "tar.bz2":
|
||||
mode = "w:bz2"
|
||||
|
||||
with tarfile.open(full_output, mode) as tf:
|
||||
for source in source_paths:
|
||||
source_path = self.workspace.get_path(source)
|
||||
tf.add(source_path, arcname=source)
|
||||
if source_path.is_file():
|
||||
file_count += 1
|
||||
total_size += source_path.stat().st_size
|
||||
else:
|
||||
for file in source_path.rglob("*"):
|
||||
if file.is_file():
|
||||
file_count += 1
|
||||
total_size += file.stat().st_size
|
||||
|
||||
archive_size = full_output.stat().st_size
|
||||
compression_ratio = (
|
||||
round((1 - archive_size / total_size) * 100, 1) if total_size > 0 else 0
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"archive": output_path,
|
||||
"type": archive_type,
|
||||
"files_added": file_count,
|
||||
"original_size_bytes": total_size,
|
||||
"archive_size_bytes": archive_size,
|
||||
"compression_ratio": f"{compression_ratio}%",
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise CommandExecutionError(f"Failed to create archive: {e}")
|
||||
|
||||
@command(
|
||||
["extract_archive", "unzip", "decompress"],
|
||||
"Extract files from a zip or tar archive.",
|
||||
{
|
||||
"archive_path": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Path to the archive file",
|
||||
required=True,
|
||||
),
|
||||
"destination": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Destination directory (default: current directory)",
|
||||
required=False,
|
||||
),
|
||||
"members": JSONSchema(
|
||||
type=JSONSchema.Type.ARRAY,
|
||||
items=JSONSchema(type=JSONSchema.Type.STRING),
|
||||
description="Specific files to extract (default: all)",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def extract_archive(
|
||||
self,
|
||||
archive_path: str,
|
||||
destination: str = ".",
|
||||
members: list[str] | None = None,
|
||||
) -> str:
|
||||
"""Extract files from an archive.
|
||||
|
||||
Args:
|
||||
archive_path: Path to the archive
|
||||
destination: Directory to extract to
|
||||
members: Specific files to extract
|
||||
|
||||
Returns:
|
||||
str: Success message with extraction details
|
||||
"""
|
||||
if not self.workspace.exists(archive_path):
|
||||
raise CommandExecutionError(f"Archive '{archive_path}' does not exist")
|
||||
|
||||
archive_type = self._get_archive_type(archive_path)
|
||||
full_archive = self.workspace.get_path(archive_path)
|
||||
full_dest = self.workspace.get_path(destination)
|
||||
|
||||
# Check archive size
|
||||
archive_size = full_archive.stat().st_size
|
||||
if archive_size > self.config.max_archive_size:
|
||||
raise CommandExecutionError(
|
||||
f"Archive too large: {archive_size} bytes (max: {self.config.max_archive_size})"
|
||||
)
|
||||
|
||||
# Create destination directory
|
||||
self.workspace.make_dir(destination)
|
||||
|
||||
extracted_count = 0
|
||||
|
||||
try:
|
||||
if archive_type == "zip":
|
||||
with zipfile.ZipFile(full_archive, "r") as zf:
|
||||
# Security check for zip slip attack
|
||||
for name in zf.namelist():
|
||||
member_path = (full_dest / name).resolve()
|
||||
if not str(member_path).startswith(str(full_dest.resolve())):
|
||||
raise CommandExecutionError(
|
||||
f"Unsafe archive: path '{name}' would extract outside destination"
|
||||
)
|
||||
|
||||
# Check total uncompressed size
|
||||
total_size = sum(info.file_size for info in zf.infolist())
|
||||
if total_size > self.config.max_extracted_size:
|
||||
raise CommandExecutionError(
|
||||
f"Archive content too large: {total_size} bytes "
|
||||
f"(max: {self.config.max_extracted_size})"
|
||||
)
|
||||
|
||||
if members:
|
||||
for member in members:
|
||||
zf.extract(member, full_dest)
|
||||
extracted_count += 1
|
||||
else:
|
||||
zf.extractall(full_dest)
|
||||
extracted_count = len(zf.namelist())
|
||||
|
||||
elif archive_type in ("tar", "tar.gz", "tar.bz2"):
|
||||
mode = "r"
|
||||
if archive_type == "tar.gz":
|
||||
mode = "r:gz"
|
||||
elif archive_type == "tar.bz2":
|
||||
mode = "r:bz2"
|
||||
|
||||
with tarfile.open(full_archive, mode) as tf:
|
||||
# Security check for path traversal
|
||||
for member in tf.getmembers():
|
||||
member_path = (full_dest / member.name).resolve()
|
||||
if not str(member_path).startswith(str(full_dest.resolve())):
|
||||
raise CommandExecutionError(
|
||||
f"Unsafe archive: path '{member.name}' would extract outside destination"
|
||||
)
|
||||
|
||||
if members:
|
||||
for member in members:
|
||||
tf.extract(member, full_dest)
|
||||
extracted_count += 1
|
||||
else:
|
||||
tf.extractall(full_dest)
|
||||
extracted_count = len(tf.getmembers())
|
||||
else:
|
||||
raise CommandExecutionError(
|
||||
f"Unsupported archive format: {archive_type}"
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"archive": archive_path,
|
||||
"destination": destination,
|
||||
"files_extracted": extracted_count,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
except (zipfile.BadZipFile, tarfile.TarError) as e:
|
||||
raise CommandExecutionError(f"Invalid or corrupted archive: {e}")
|
||||
except Exception as e:
|
||||
raise CommandExecutionError(f"Extraction failed: {e}")
|
||||
|
||||
@command(
|
||||
["list_archive", "archive_contents"],
|
||||
"List the contents of an archive without extracting.",
|
||||
{
|
||||
"archive_path": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Path to the archive file",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def list_archive(self, archive_path: str) -> str:
|
||||
"""List contents of an archive.
|
||||
|
||||
Args:
|
||||
archive_path: Path to the archive
|
||||
|
||||
Returns:
|
||||
str: JSON with archive contents
|
||||
"""
|
||||
if not self.workspace.exists(archive_path):
|
||||
raise CommandExecutionError(f"Archive '{archive_path}' does not exist")
|
||||
|
||||
archive_type = self._get_archive_type(archive_path)
|
||||
full_archive = self.workspace.get_path(archive_path)
|
||||
|
||||
contents = []
|
||||
|
||||
try:
|
||||
if archive_type == "zip":
|
||||
with zipfile.ZipFile(full_archive, "r") as zf:
|
||||
for info in zf.infolist():
|
||||
contents.append(
|
||||
{
|
||||
"name": info.filename,
|
||||
"size": info.file_size,
|
||||
"compressed_size": info.compress_size,
|
||||
"is_dir": info.is_dir(),
|
||||
}
|
||||
)
|
||||
elif archive_type in ("tar", "tar.gz", "tar.bz2"):
|
||||
mode = "r"
|
||||
if archive_type == "tar.gz":
|
||||
mode = "r:gz"
|
||||
elif archive_type == "tar.bz2":
|
||||
mode = "r:bz2"
|
||||
|
||||
with tarfile.open(full_archive, mode) as tf:
|
||||
for member in tf.getmembers():
|
||||
contents.append(
|
||||
{
|
||||
"name": member.name,
|
||||
"size": member.size,
|
||||
"is_dir": member.isdir(),
|
||||
}
|
||||
)
|
||||
else:
|
||||
raise CommandExecutionError(
|
||||
f"Unsupported archive format: {archive_type}"
|
||||
)
|
||||
|
||||
total_size = sum(item.get("size", 0) for item in contents)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"archive": archive_path,
|
||||
"type": archive_type,
|
||||
"file_count": len(contents),
|
||||
"total_size_bytes": total_size,
|
||||
"contents": contents,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
except (zipfile.BadZipFile, tarfile.TarError) as e:
|
||||
raise CommandExecutionError(f"Invalid or corrupted archive: {e}")
|
||||
6
classic/forge/forge/components/clipboard/__init__.py
Normal file
6
classic/forge/forge/components/clipboard/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from forge.components.clipboard.clipboard import (
|
||||
ClipboardComponent,
|
||||
ClipboardConfiguration,
|
||||
)
|
||||
|
||||
__all__ = ["ClipboardComponent", "ClipboardConfiguration"]
|
||||
197
classic/forge/forge/components/clipboard/clipboard.py
Normal file
197
classic/forge/forge/components/clipboard/clipboard.py
Normal file
@@ -0,0 +1,197 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from forge.agent.components import ConfigurableComponent
|
||||
from forge.agent.protocols import CommandProvider, DirectiveProvider
|
||||
from forge.command import Command, command
|
||||
from forge.models.json_schema import JSONSchema
|
||||
from forge.utils.exceptions import CommandExecutionError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClipboardConfiguration(BaseModel):
|
||||
max_items: int = Field(
|
||||
default=100, description="Maximum number of clipboard items to store"
|
||||
)
|
||||
max_value_size: int = Field(
|
||||
default=1024 * 1024, # 1MB
|
||||
description="Maximum size of a single clipboard value in bytes",
|
||||
)
|
||||
|
||||
|
||||
class ClipboardComponent(
|
||||
DirectiveProvider, CommandProvider, ConfigurableComponent[ClipboardConfiguration]
|
||||
):
|
||||
"""Provides an in-memory clipboard for storing and retrieving data between commands."""
|
||||
|
||||
config_class = ClipboardConfiguration
|
||||
|
||||
def __init__(self, config: Optional[ClipboardConfiguration] = None):
|
||||
ConfigurableComponent.__init__(self, config)
|
||||
self._storage: dict[str, Any] = {}
|
||||
|
||||
def get_resources(self) -> Iterator[str]:
|
||||
yield "In-memory clipboard for storing temporary data."
|
||||
|
||||
def get_commands(self) -> Iterator[Command]:
|
||||
yield self.clipboard_copy
|
||||
yield self.clipboard_paste
|
||||
yield self.clipboard_list
|
||||
yield self.clipboard_clear
|
||||
|
||||
@command(
|
||||
["clipboard_copy", "store", "remember"],
|
||||
"Store a value in the clipboard with a key for later retrieval.",
|
||||
{
|
||||
"key": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="A unique key to identify this data",
|
||||
required=True,
|
||||
),
|
||||
"value": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The value to store (can be any string, including JSON)",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def clipboard_copy(self, key: str, value: str) -> str:
|
||||
"""Store a value in the clipboard.
|
||||
|
||||
Args:
|
||||
key: The key to store under
|
||||
value: The value to store
|
||||
|
||||
Returns:
|
||||
str: Confirmation message
|
||||
"""
|
||||
if not key:
|
||||
raise CommandExecutionError("Key cannot be empty")
|
||||
|
||||
# Check value size
|
||||
value_size = len(value.encode("utf-8"))
|
||||
if value_size > self.config.max_value_size:
|
||||
raise CommandExecutionError(
|
||||
f"Value too large: {value_size} bytes (max: {self.config.max_value_size})"
|
||||
)
|
||||
|
||||
# Check item limit (excluding update of existing key)
|
||||
if key not in self._storage and len(self._storage) >= self.config.max_items:
|
||||
raise CommandExecutionError(
|
||||
f"Clipboard full: max {self.config.max_items} items. "
|
||||
"Use clipboard_clear to remove items."
|
||||
)
|
||||
|
||||
is_update = key in self._storage
|
||||
self._storage[key] = value
|
||||
|
||||
action = "Updated" if is_update else "Stored"
|
||||
return json.dumps(
|
||||
{
|
||||
"action": action.lower(),
|
||||
"key": key,
|
||||
"value_length": len(value),
|
||||
"message": f"{action} value under key '{key}'",
|
||||
}
|
||||
)
|
||||
|
||||
@command(
|
||||
["clipboard_paste", "retrieve", "recall"],
|
||||
"Retrieve a value from the clipboard by its key.",
|
||||
{
|
||||
"key": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The key of the value to retrieve",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def clipboard_paste(self, key: str) -> str:
|
||||
"""Retrieve a value from the clipboard.
|
||||
|
||||
Args:
|
||||
key: The key to retrieve
|
||||
|
||||
Returns:
|
||||
str: The stored value or error message
|
||||
"""
|
||||
if key not in self._storage:
|
||||
available = list(self._storage.keys())[:10]
|
||||
raise CommandExecutionError(
|
||||
f"Key '{key}' not found in clipboard. "
|
||||
f"Available keys: {available if available else '(empty)'}"
|
||||
)
|
||||
|
||||
value = self._storage[key]
|
||||
|
||||
return json.dumps({"key": key, "value": value, "found": True})
|
||||
|
||||
@command(
|
||||
["clipboard_list", "list_stored"],
|
||||
"List all keys stored in the clipboard with their value lengths.",
|
||||
{},
|
||||
)
|
||||
def clipboard_list(self) -> str:
|
||||
"""List all clipboard keys.
|
||||
|
||||
Returns:
|
||||
str: JSON with all keys and metadata
|
||||
"""
|
||||
items = []
|
||||
for key, value in self._storage.items():
|
||||
items.append(
|
||||
{
|
||||
"key": key,
|
||||
"value_length": len(str(value)),
|
||||
"value_preview": str(value)[:50]
|
||||
+ ("..." if len(str(value)) > 50 else ""),
|
||||
}
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{"count": len(items), "items": items, "max_items": self.config.max_items},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
@command(
|
||||
["clipboard_clear", "forget"],
|
||||
"Clear one or all items from the clipboard.",
|
||||
{
|
||||
"key": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Specific key to clear (omit to clear all)",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def clipboard_clear(self, key: str | None = None) -> str:
|
||||
"""Clear clipboard items.
|
||||
|
||||
Args:
|
||||
key: Specific key to clear, or None to clear all
|
||||
|
||||
Returns:
|
||||
str: Confirmation message
|
||||
"""
|
||||
if key is not None:
|
||||
if key not in self._storage:
|
||||
raise CommandExecutionError(f"Key '{key}' not found in clipboard")
|
||||
|
||||
del self._storage[key]
|
||||
return json.dumps(
|
||||
{"action": "cleared", "key": key, "message": f"Removed key '{key}'"}
|
||||
)
|
||||
else:
|
||||
count = len(self._storage)
|
||||
self._storage.clear()
|
||||
return json.dumps(
|
||||
{
|
||||
"action": "cleared_all",
|
||||
"items_removed": count,
|
||||
"message": f"Cleared {count} item(s) from clipboard",
|
||||
}
|
||||
)
|
||||
@@ -0,0 +1,6 @@
|
||||
from forge.components.data_processor.data_processor import (
|
||||
DataProcessorComponent,
|
||||
DataProcessorConfiguration,
|
||||
)
|
||||
|
||||
__all__ = ["DataProcessorComponent", "DataProcessorConfiguration"]
|
||||
476
classic/forge/forge/components/data_processor/data_processor.py
Normal file
476
classic/forge/forge/components/data_processor/data_processor.py
Normal file
@@ -0,0 +1,476 @@
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Iterator, Literal, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from forge.agent.components import ConfigurableComponent
|
||||
from forge.agent.protocols import CommandProvider, DirectiveProvider
|
||||
from forge.command import Command, command
|
||||
from forge.models.json_schema import JSONSchema
|
||||
from forge.utils.exceptions import DataProcessingError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DataProcessorConfiguration(BaseModel):
|
||||
max_json_depth: int = Field(
|
||||
default=10, description="Maximum nesting depth for JSON parsing"
|
||||
)
|
||||
max_csv_rows: int = Field(
|
||||
default=10000, description="Maximum rows to process in CSV operations"
|
||||
)
|
||||
|
||||
|
||||
class DataProcessorComponent(
|
||||
DirectiveProvider,
|
||||
CommandProvider,
|
||||
ConfigurableComponent[DataProcessorConfiguration],
|
||||
):
|
||||
"""Provides commands to parse, transform, and query structured data."""
|
||||
|
||||
config_class = DataProcessorConfiguration
|
||||
|
||||
def __init__(self, config: Optional[DataProcessorConfiguration] = None):
|
||||
ConfigurableComponent.__init__(self, config)
|
||||
|
||||
def get_resources(self) -> Iterator[str]:
|
||||
yield "Ability to parse and manipulate JSON and CSV data."
|
||||
|
||||
def get_commands(self) -> Iterator[Command]:
|
||||
yield self.parse_json
|
||||
yield self.format_json
|
||||
yield self.query_json
|
||||
yield self.parse_csv
|
||||
yield self.filter_csv
|
||||
yield self.aggregate_csv
|
||||
|
||||
@command(
|
||||
["parse_json", "validate_json"],
|
||||
"Parse and validate a JSON string, returning a structured representation.",
|
||||
{
|
||||
"json_string": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The JSON string to parse",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def parse_json(self, json_string: str) -> str:
|
||||
"""Parse and validate a JSON string.
|
||||
|
||||
Args:
|
||||
json_string: The JSON string to parse
|
||||
|
||||
Returns:
|
||||
str: Parsed JSON as formatted string with type information
|
||||
"""
|
||||
try:
|
||||
data = json.loads(json_string)
|
||||
|
||||
# Provide type information
|
||||
result = {
|
||||
"valid": True,
|
||||
"type": type(data).__name__,
|
||||
"data": data,
|
||||
}
|
||||
|
||||
if isinstance(data, list):
|
||||
result["length"] = len(data)
|
||||
elif isinstance(data, dict):
|
||||
result["keys"] = list(data.keys())
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
return json.dumps(
|
||||
{
|
||||
"valid": False,
|
||||
"error": str(e),
|
||||
"line": e.lineno,
|
||||
"column": e.colno,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
@command(
|
||||
["format_json", "pretty_print_json"],
|
||||
"Format JSON with proper indentation for readability.",
|
||||
{
|
||||
"json_string": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The JSON string to format",
|
||||
required=True,
|
||||
),
|
||||
"indent": JSONSchema(
|
||||
type=JSONSchema.Type.INTEGER,
|
||||
description="Number of spaces for indentation (default: 2)",
|
||||
minimum=0,
|
||||
maximum=8,
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def format_json(self, json_string: str, indent: int = 2) -> str:
|
||||
"""Format JSON with proper indentation.
|
||||
|
||||
Args:
|
||||
json_string: The JSON string to format
|
||||
indent: Number of spaces for indentation
|
||||
|
||||
Returns:
|
||||
str: Formatted JSON string
|
||||
"""
|
||||
try:
|
||||
data = json.loads(json_string)
|
||||
return json.dumps(data, indent=indent, ensure_ascii=False)
|
||||
except json.JSONDecodeError as e:
|
||||
raise DataProcessingError(f"Invalid JSON: {e}")
|
||||
|
||||
def _query_path(self, data: Any, path: str) -> Any:
|
||||
"""Query JSON data using a dot-notation path with array support.
|
||||
|
||||
Args:
|
||||
data: The data to query
|
||||
path: Path like "users[0].name" or "config.settings.enabled"
|
||||
|
||||
Returns:
|
||||
The value at the path
|
||||
"""
|
||||
import re
|
||||
|
||||
if not path:
|
||||
return data
|
||||
|
||||
# Split path into segments, handling array notation
|
||||
segments = []
|
||||
for part in path.split("."):
|
||||
# Handle array notation like "users[0]"
|
||||
array_match = re.match(r"^(\w+)\[(\d+)\]$", part)
|
||||
if array_match:
|
||||
segments.append(array_match.group(1))
|
||||
segments.append(int(array_match.group(2)))
|
||||
elif part.isdigit():
|
||||
segments.append(int(part))
|
||||
else:
|
||||
segments.append(part)
|
||||
|
||||
result = data
|
||||
for segment in segments:
|
||||
try:
|
||||
if isinstance(segment, int):
|
||||
result = result[segment]
|
||||
elif isinstance(result, dict):
|
||||
result = result[segment]
|
||||
elif isinstance(result, list) and segment.isdigit():
|
||||
result = result[int(segment)]
|
||||
else:
|
||||
raise DataProcessingError(
|
||||
f"Cannot access '{segment}' on {type(result).__name__}"
|
||||
)
|
||||
except (KeyError, IndexError, TypeError) as e:
|
||||
raise DataProcessingError(f"Path query failed at '{segment}': {e}")
|
||||
|
||||
return result
|
||||
|
||||
@command(
|
||||
["query_json", "json_path"],
|
||||
"Query JSON data using a dot-notation path (e.g., 'users[0].name').",
|
||||
{
|
||||
"json_string": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The JSON string to query",
|
||||
required=True,
|
||||
),
|
||||
"path": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Path to query (e.g., 'data.users[0].email')",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def query_json(self, json_string: str, path: str) -> str:
|
||||
"""Query JSON using dot-notation path.
|
||||
|
||||
Args:
|
||||
json_string: The JSON string to query
|
||||
path: The path to query
|
||||
|
||||
Returns:
|
||||
str: The value at the path as JSON
|
||||
"""
|
||||
try:
|
||||
data = json.loads(json_string)
|
||||
result = self._query_path(data, path)
|
||||
return json.dumps(result, indent=2)
|
||||
except json.JSONDecodeError as e:
|
||||
raise DataProcessingError(f"Invalid JSON: {e}")
|
||||
|
||||
@command(
|
||||
["parse_csv", "csv_to_json"],
|
||||
"Parse CSV string into JSON array of objects.",
|
||||
{
|
||||
"csv_string": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The CSV string to parse",
|
||||
required=True,
|
||||
),
|
||||
"has_header": JSONSchema(
|
||||
type=JSONSchema.Type.BOOLEAN,
|
||||
description="Whether the first row is a header (default: True)",
|
||||
required=False,
|
||||
),
|
||||
"delimiter": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Field delimiter (default: ',')",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def parse_csv(
|
||||
self, csv_string: str, has_header: bool = True, delimiter: str = ","
|
||||
) -> str:
|
||||
"""Parse CSV string into JSON.
|
||||
|
||||
Args:
|
||||
csv_string: The CSV string to parse
|
||||
has_header: Whether first row is header
|
||||
delimiter: Field delimiter
|
||||
|
||||
Returns:
|
||||
str: JSON array of objects or arrays
|
||||
"""
|
||||
try:
|
||||
reader = csv.reader(io.StringIO(csv_string), delimiter=delimiter)
|
||||
rows = list(reader)
|
||||
|
||||
if len(rows) > self.config.max_csv_rows:
|
||||
raise DataProcessingError(
|
||||
f"CSV exceeds maximum of {self.config.max_csv_rows} rows"
|
||||
)
|
||||
|
||||
if not rows:
|
||||
return json.dumps([])
|
||||
|
||||
if has_header:
|
||||
headers = rows[0]
|
||||
data = [dict(zip(headers, row)) for row in rows[1:]]
|
||||
else:
|
||||
data = rows
|
||||
|
||||
return json.dumps(data, indent=2)
|
||||
|
||||
except csv.Error as e:
|
||||
raise DataProcessingError(f"CSV parsing error: {e}")
|
||||
|
||||
@command(
|
||||
["filter_csv", "csv_filter"],
|
||||
"Filter CSV rows based on a column condition.",
|
||||
{
|
||||
"csv_string": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The CSV string to filter",
|
||||
required=True,
|
||||
),
|
||||
"column": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Column name or index to filter on",
|
||||
required=True,
|
||||
),
|
||||
"operator": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Comparison operator (eq, ne, gt, lt, gte, lte, contains)",
|
||||
required=True,
|
||||
),
|
||||
"value": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Value to compare against",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def filter_csv(
|
||||
self,
|
||||
csv_string: str,
|
||||
column: str,
|
||||
operator: Literal["eq", "ne", "gt", "lt", "gte", "lte", "contains"],
|
||||
value: str,
|
||||
) -> str:
|
||||
"""Filter CSV rows based on a column condition.
|
||||
|
||||
Args:
|
||||
csv_string: The CSV string to filter
|
||||
column: Column name or index
|
||||
operator: Comparison operator
|
||||
value: Value to compare against
|
||||
|
||||
Returns:
|
||||
str: Filtered CSV as JSON
|
||||
"""
|
||||
# Parse CSV
|
||||
data = json.loads(self.parse_csv(csv_string))
|
||||
|
||||
if not data:
|
||||
return json.dumps([])
|
||||
|
||||
def compare(row_value: Any, op: str, comp_value: str) -> bool:
|
||||
# Try numeric comparison
|
||||
try:
|
||||
row_num = float(row_value)
|
||||
comp_num = float(comp_value)
|
||||
if op == "eq":
|
||||
return row_num == comp_num
|
||||
elif op == "ne":
|
||||
return row_num != comp_num
|
||||
elif op == "gt":
|
||||
return row_num > comp_num
|
||||
elif op == "lt":
|
||||
return row_num < comp_num
|
||||
elif op == "gte":
|
||||
return row_num >= comp_num
|
||||
elif op == "lte":
|
||||
return row_num <= comp_num
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# String comparison
|
||||
row_str = str(row_value).lower()
|
||||
comp_str = comp_value.lower()
|
||||
|
||||
if op == "eq":
|
||||
return row_str == comp_str
|
||||
elif op == "ne":
|
||||
return row_str != comp_str
|
||||
elif op == "contains":
|
||||
return comp_str in row_str
|
||||
elif op in ("gt", "lt", "gte", "lte"):
|
||||
# String comparison for non-numeric
|
||||
if op == "gt":
|
||||
return row_str > comp_str
|
||||
elif op == "lt":
|
||||
return row_str < comp_str
|
||||
elif op == "gte":
|
||||
return row_str >= comp_str
|
||||
elif op == "lte":
|
||||
return row_str <= comp_str
|
||||
|
||||
return False
|
||||
|
||||
filtered = []
|
||||
for row in data:
|
||||
if isinstance(row, dict):
|
||||
if column in row:
|
||||
if compare(row[column], operator, value):
|
||||
filtered.append(row)
|
||||
elif isinstance(row, list):
|
||||
try:
|
||||
col_idx = int(column)
|
||||
if col_idx < len(row):
|
||||
if compare(row[col_idx], operator, value):
|
||||
filtered.append(row)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return json.dumps(filtered, indent=2)
|
||||
|
||||
@command(
|
||||
["aggregate_csv", "csv_aggregate"],
|
||||
"Aggregate data in a CSV column (sum, avg, min, max, count).",
|
||||
{
|
||||
"csv_string": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The CSV string to aggregate",
|
||||
required=True,
|
||||
),
|
||||
"column": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Column name to aggregate",
|
||||
required=True,
|
||||
),
|
||||
"operation": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Aggregation operation (sum, avg, min, max, count)",
|
||||
required=True,
|
||||
),
|
||||
"group_by": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Optional column to group by",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def aggregate_csv(
|
||||
self,
|
||||
csv_string: str,
|
||||
column: str,
|
||||
operation: Literal["sum", "avg", "min", "max", "count"],
|
||||
group_by: str | None = None,
|
||||
) -> str:
|
||||
"""Aggregate data in a CSV column.
|
||||
|
||||
Args:
|
||||
csv_string: The CSV string to aggregate
|
||||
column: Column name to aggregate
|
||||
operation: Aggregation operation
|
||||
group_by: Optional grouping column
|
||||
|
||||
Returns:
|
||||
str: Aggregation result as JSON
|
||||
"""
|
||||
data = json.loads(self.parse_csv(csv_string))
|
||||
|
||||
if not data:
|
||||
return json.dumps({"result": None, "error": "No data"})
|
||||
|
||||
def aggregate(values: list) -> float | int | None:
|
||||
# Filter to numeric values
|
||||
numeric = []
|
||||
for v in values:
|
||||
try:
|
||||
numeric.append(float(v))
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
|
||||
if not numeric:
|
||||
if operation == "count":
|
||||
return len(values)
|
||||
return None
|
||||
|
||||
if operation == "sum":
|
||||
return sum(numeric)
|
||||
elif operation == "avg":
|
||||
return sum(numeric) / len(numeric)
|
||||
elif operation == "min":
|
||||
return min(numeric)
|
||||
elif operation == "max":
|
||||
return max(numeric)
|
||||
elif operation == "count":
|
||||
return len(values)
|
||||
return None
|
||||
|
||||
if group_by:
|
||||
# Group by operation
|
||||
groups: dict[str, list] = {}
|
||||
for row in data:
|
||||
if isinstance(row, dict):
|
||||
key = str(row.get(group_by, ""))
|
||||
value = row.get(column)
|
||||
if key not in groups:
|
||||
groups[key] = []
|
||||
groups[key].append(value)
|
||||
|
||||
result = {key: aggregate(values) for key, values in groups.items()}
|
||||
return json.dumps({"grouped_by": group_by, "results": result}, indent=2)
|
||||
else:
|
||||
# Simple aggregation
|
||||
values = []
|
||||
for row in data:
|
||||
if isinstance(row, dict):
|
||||
values.append(row.get(column))
|
||||
|
||||
return json.dumps(
|
||||
{"column": column, "operation": operation, "result": aggregate(values)},
|
||||
indent=2,
|
||||
)
|
||||
6
classic/forge/forge/components/http_client/__init__.py
Normal file
6
classic/forge/forge/components/http_client/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from forge.components.http_client.http_client import (
|
||||
HTTPClientComponent,
|
||||
HTTPClientConfiguration,
|
||||
)
|
||||
|
||||
__all__ = ["HTTPClientComponent", "HTTPClientConfiguration"]
|
||||
354
classic/forge/forge/components/http_client/http_client.py
Normal file
354
classic/forge/forge/components/http_client/http_client.py
Normal file
@@ -0,0 +1,354 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from forge.agent.components import ConfigurableComponent
|
||||
from forge.agent.protocols import CommandProvider, DirectiveProvider
|
||||
from forge.command import Command, command
|
||||
from forge.models.json_schema import JSONSchema
|
||||
from forge.utils.exceptions import HTTPError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HTTPClientConfiguration(BaseModel):
|
||||
default_timeout: int = Field(
|
||||
default=30, description="Default timeout in seconds for HTTP requests"
|
||||
)
|
||||
max_retries: int = Field(
|
||||
default=3, description="Maximum number of retries for failed requests"
|
||||
)
|
||||
allowed_domains: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="List of allowed domains (empty = all domains allowed)",
|
||||
)
|
||||
user_agent: str = Field(
|
||||
default="AutoGPT-HTTPClient/1.0",
|
||||
description="User agent string for requests",
|
||||
)
|
||||
max_response_size: int = Field(
|
||||
default=1024 * 1024, # 1MB
|
||||
description="Maximum response size in bytes",
|
||||
)
|
||||
|
||||
|
||||
class HTTPClientComponent(
|
||||
DirectiveProvider, CommandProvider, ConfigurableComponent[HTTPClientConfiguration]
|
||||
):
|
||||
"""Provides commands to make HTTP requests."""
|
||||
|
||||
config_class = HTTPClientConfiguration
|
||||
|
||||
def __init__(self, config: Optional[HTTPClientConfiguration] = None):
|
||||
ConfigurableComponent.__init__(self, config)
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({"User-Agent": self.config.user_agent})
|
||||
|
||||
def get_resources(self) -> Iterator[str]:
|
||||
yield "Ability to make HTTP requests to external APIs."
|
||||
|
||||
def get_commands(self) -> Iterator[Command]:
|
||||
yield self.http_get
|
||||
yield self.http_post
|
||||
yield self.http_put
|
||||
yield self.http_delete
|
||||
|
||||
def _is_domain_allowed(self, url: str) -> bool:
|
||||
"""Check if the URL's domain is in the allowed list."""
|
||||
if not self.config.allowed_domains:
|
||||
return True
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(url)
|
||||
domain = parsed.netloc.lower()
|
||||
|
||||
for allowed in self.config.allowed_domains:
|
||||
if domain == allowed.lower() or domain.endswith("." + allowed.lower()):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _make_request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
headers: dict[str, str] | None = None,
|
||||
params: dict[str, Any] | None = None,
|
||||
body: dict[str, Any] | str | None = None,
|
||||
timeout: int | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Make an HTTP request and return a structured response.
|
||||
|
||||
Args:
|
||||
method: HTTP method (GET, POST, PUT, DELETE)
|
||||
url: The URL to request
|
||||
headers: Optional headers
|
||||
params: Optional query parameters
|
||||
body: Optional request body
|
||||
timeout: Optional timeout override
|
||||
|
||||
Returns:
|
||||
dict: Structured response with status, headers, and body
|
||||
"""
|
||||
if not self._is_domain_allowed(url):
|
||||
raise HTTPError(
|
||||
f"Domain not in allowed list. Allowed: {self.config.allowed_domains}",
|
||||
url=url,
|
||||
)
|
||||
|
||||
request_timeout = timeout or self.config.default_timeout
|
||||
request_headers = headers or {}
|
||||
|
||||
try:
|
||||
if method == "GET":
|
||||
response = self.session.get(
|
||||
url, headers=request_headers, params=params, timeout=request_timeout
|
||||
)
|
||||
elif method == "POST":
|
||||
response = self.session.post(
|
||||
url,
|
||||
headers=request_headers,
|
||||
params=params,
|
||||
json=body if isinstance(body, dict) else None,
|
||||
data=body if isinstance(body, str) else None,
|
||||
timeout=request_timeout,
|
||||
)
|
||||
elif method == "PUT":
|
||||
response = self.session.put(
|
||||
url,
|
||||
headers=request_headers,
|
||||
params=params,
|
||||
json=body if isinstance(body, dict) else None,
|
||||
data=body if isinstance(body, str) else None,
|
||||
timeout=request_timeout,
|
||||
)
|
||||
elif method == "DELETE":
|
||||
response = self.session.delete(
|
||||
url, headers=request_headers, params=params, timeout=request_timeout
|
||||
)
|
||||
else:
|
||||
raise HTTPError(f"Unsupported HTTP method: {method}", url=url)
|
||||
|
||||
# Check response size
|
||||
content_length = len(response.content)
|
||||
if content_length > self.config.max_response_size:
|
||||
raise HTTPError(
|
||||
f"Response too large: {content_length} bytes "
|
||||
f"(max: {self.config.max_response_size})",
|
||||
status_code=response.status_code,
|
||||
url=url,
|
||||
)
|
||||
|
||||
# Try to parse as JSON, fall back to text
|
||||
try:
|
||||
response_body = response.json()
|
||||
except json.JSONDecodeError:
|
||||
response_body = response.text
|
||||
|
||||
return {
|
||||
"status_code": response.status_code,
|
||||
"headers": dict(response.headers),
|
||||
"body": response_body,
|
||||
"url": response.url,
|
||||
}
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
raise HTTPError(
|
||||
f"Request timed out after {request_timeout} seconds", url=url
|
||||
)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
raise HTTPError(f"Connection error: {e}", url=url)
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise HTTPError(f"Request failed: {e}", url=url)
|
||||
|
||||
@command(
|
||||
["http_get", "get_request"],
|
||||
"Make an HTTP GET request to retrieve data from a URL.",
|
||||
{
|
||||
"url": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The URL to fetch",
|
||||
required=True,
|
||||
),
|
||||
"headers": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="Optional HTTP headers as key-value pairs",
|
||||
required=False,
|
||||
),
|
||||
"params": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="Optional query parameters",
|
||||
required=False,
|
||||
),
|
||||
"timeout": JSONSchema(
|
||||
type=JSONSchema.Type.INTEGER,
|
||||
description="Timeout in seconds (default: 30)",
|
||||
minimum=1,
|
||||
maximum=300,
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def http_get(
|
||||
self,
|
||||
url: str,
|
||||
headers: dict[str, str] | None = None,
|
||||
params: dict[str, Any] | None = None,
|
||||
timeout: int | None = None,
|
||||
) -> str:
|
||||
"""Make an HTTP GET request.
|
||||
|
||||
Args:
|
||||
url: The URL to request
|
||||
headers: Optional headers
|
||||
params: Optional query parameters
|
||||
timeout: Optional timeout
|
||||
|
||||
Returns:
|
||||
str: JSON-formatted response
|
||||
"""
|
||||
result = self._make_request("GET", url, headers, params, timeout=timeout)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
@command(
|
||||
["http_post", "post_request"],
|
||||
"Make an HTTP POST request to send data to a URL.",
|
||||
{
|
||||
"url": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The URL to post to",
|
||||
required=True,
|
||||
),
|
||||
"body": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="The request body (will be sent as JSON)",
|
||||
required=False,
|
||||
),
|
||||
"headers": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="Optional HTTP headers",
|
||||
required=False,
|
||||
),
|
||||
"timeout": JSONSchema(
|
||||
type=JSONSchema.Type.INTEGER,
|
||||
description="Timeout in seconds (default: 30)",
|
||||
minimum=1,
|
||||
maximum=300,
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def http_post(
|
||||
self,
|
||||
url: str,
|
||||
body: dict[str, Any] | None = None,
|
||||
headers: dict[str, str] | None = None,
|
||||
timeout: int | None = None,
|
||||
) -> str:
|
||||
"""Make an HTTP POST request.
|
||||
|
||||
Args:
|
||||
url: The URL to request
|
||||
body: Request body
|
||||
headers: Optional headers
|
||||
timeout: Optional timeout
|
||||
|
||||
Returns:
|
||||
str: JSON-formatted response
|
||||
"""
|
||||
result = self._make_request("POST", url, headers, body=body, timeout=timeout)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
@command(
|
||||
["http_put", "put_request"],
|
||||
"Make an HTTP PUT request to update data at a URL.",
|
||||
{
|
||||
"url": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The URL to put to",
|
||||
required=True,
|
||||
),
|
||||
"body": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="The request body (will be sent as JSON)",
|
||||
required=True,
|
||||
),
|
||||
"headers": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="Optional HTTP headers",
|
||||
required=False,
|
||||
),
|
||||
"timeout": JSONSchema(
|
||||
type=JSONSchema.Type.INTEGER,
|
||||
description="Timeout in seconds (default: 30)",
|
||||
minimum=1,
|
||||
maximum=300,
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def http_put(
|
||||
self,
|
||||
url: str,
|
||||
body: dict[str, Any],
|
||||
headers: dict[str, str] | None = None,
|
||||
timeout: int | None = None,
|
||||
) -> str:
|
||||
"""Make an HTTP PUT request.
|
||||
|
||||
Args:
|
||||
url: The URL to request
|
||||
body: Request body
|
||||
headers: Optional headers
|
||||
timeout: Optional timeout
|
||||
|
||||
Returns:
|
||||
str: JSON-formatted response
|
||||
"""
|
||||
result = self._make_request("PUT", url, headers, body=body, timeout=timeout)
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
@command(
|
||||
["http_delete", "delete_request"],
|
||||
"Make an HTTP DELETE request to remove a resource.",
|
||||
{
|
||||
"url": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The URL to delete",
|
||||
required=True,
|
||||
),
|
||||
"headers": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="Optional HTTP headers",
|
||||
required=False,
|
||||
),
|
||||
"timeout": JSONSchema(
|
||||
type=JSONSchema.Type.INTEGER,
|
||||
description="Timeout in seconds (default: 30)",
|
||||
minimum=1,
|
||||
maximum=300,
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def http_delete(
|
||||
self,
|
||||
url: str,
|
||||
headers: dict[str, str] | None = None,
|
||||
timeout: int | None = None,
|
||||
) -> str:
|
||||
"""Make an HTTP DELETE request.
|
||||
|
||||
Args:
|
||||
url: The URL to request
|
||||
headers: Optional headers
|
||||
timeout: Optional timeout
|
||||
|
||||
Returns:
|
||||
str: JSON-formatted response
|
||||
"""
|
||||
result = self._make_request("DELETE", url, headers, timeout=timeout)
|
||||
return json.dumps(result, indent=2)
|
||||
6
classic/forge/forge/components/math_utils/__init__.py
Normal file
6
classic/forge/forge/components/math_utils/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from forge.components.math_utils.math_utils import (
|
||||
MathUtilsComponent,
|
||||
MathUtilsConfiguration,
|
||||
)
|
||||
|
||||
__all__ = ["MathUtilsComponent", "MathUtilsConfiguration"]
|
||||
492
classic/forge/forge/components/math_utils/math_utils.py
Normal file
492
classic/forge/forge/components/math_utils/math_utils.py
Normal file
@@ -0,0 +1,492 @@
|
||||
import ast
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import operator
|
||||
import statistics
|
||||
from typing import Any, Iterator, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from forge.agent.components import ConfigurableComponent
|
||||
from forge.agent.protocols import CommandProvider, DirectiveProvider
|
||||
from forge.command import Command, command
|
||||
from forge.models.json_schema import JSONSchema
|
||||
from forge.utils.exceptions import CommandExecutionError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MathUtilsConfiguration(BaseModel):
|
||||
pass # No configuration needed for now
|
||||
|
||||
|
||||
class SafeEvaluator(ast.NodeVisitor):
|
||||
"""Safe evaluator for mathematical expressions."""
|
||||
|
||||
# Allowed operators
|
||||
OPERATORS = {
|
||||
ast.Add: operator.add,
|
||||
ast.Sub: operator.sub,
|
||||
ast.Mult: operator.mul,
|
||||
ast.Div: operator.truediv,
|
||||
ast.FloorDiv: operator.floordiv,
|
||||
ast.Mod: operator.mod,
|
||||
ast.Pow: operator.pow,
|
||||
ast.USub: operator.neg,
|
||||
ast.UAdd: operator.pos,
|
||||
}
|
||||
|
||||
# Allowed functions
|
||||
FUNCTIONS = {
|
||||
"abs": abs,
|
||||
"round": round,
|
||||
"min": min,
|
||||
"max": max,
|
||||
"sum": sum,
|
||||
"sqrt": math.sqrt,
|
||||
"sin": math.sin,
|
||||
"cos": math.cos,
|
||||
"tan": math.tan,
|
||||
"log": math.log,
|
||||
"log10": math.log10,
|
||||
"log2": math.log2,
|
||||
"exp": math.exp,
|
||||
"floor": math.floor,
|
||||
"ceil": math.ceil,
|
||||
"pow": pow,
|
||||
}
|
||||
|
||||
# Allowed constants
|
||||
CONSTANTS = {
|
||||
"pi": math.pi,
|
||||
"e": math.e,
|
||||
"inf": float("inf"),
|
||||
}
|
||||
|
||||
def visit(self, node: ast.AST) -> float:
|
||||
return super().visit(node)
|
||||
|
||||
def generic_visit(self, node: ast.AST) -> float:
|
||||
raise CommandExecutionError(
|
||||
f"Unsupported operation: {type(node).__name__}. "
|
||||
"Only basic arithmetic, math functions, and constants are allowed."
|
||||
)
|
||||
|
||||
def visit_Expression(self, node: ast.Expression) -> float:
|
||||
return self.visit(node.body)
|
||||
|
||||
def visit_Constant(self, node: ast.Constant) -> float:
|
||||
if isinstance(node.value, (int, float)):
|
||||
return node.value
|
||||
raise CommandExecutionError(f"Invalid constant: {node.value}")
|
||||
|
||||
def visit_Num(self, node: ast.Num) -> float: # Python 3.7 compatibility
|
||||
return float(node.n) # type: ignore[attr-defined]
|
||||
|
||||
def visit_Name(self, node: ast.Name) -> float:
|
||||
if node.id in self.CONSTANTS:
|
||||
return self.CONSTANTS[node.id]
|
||||
raise CommandExecutionError(
|
||||
f"Unknown variable: {node.id}. Available constants: {list(self.CONSTANTS.keys())}"
|
||||
)
|
||||
|
||||
def visit_BinOp(self, node: ast.BinOp) -> float:
|
||||
if type(node.op) not in self.OPERATORS:
|
||||
raise CommandExecutionError(
|
||||
f"Unsupported operator: {type(node.op).__name__}"
|
||||
)
|
||||
left = self.visit(node.left)
|
||||
right = self.visit(node.right)
|
||||
return self.OPERATORS[type(node.op)](left, right)
|
||||
|
||||
def visit_UnaryOp(self, node: ast.UnaryOp) -> float:
|
||||
if type(node.op) not in self.OPERATORS:
|
||||
raise CommandExecutionError(
|
||||
f"Unsupported unary operator: {type(node.op).__name__}"
|
||||
)
|
||||
operand = self.visit(node.operand)
|
||||
return self.OPERATORS[type(node.op)](operand)
|
||||
|
||||
def visit_Call(self, node: ast.Call) -> float:
|
||||
if not isinstance(node.func, ast.Name):
|
||||
raise CommandExecutionError("Only direct function calls are allowed")
|
||||
|
||||
func_name = node.func.id
|
||||
if func_name not in self.FUNCTIONS:
|
||||
raise CommandExecutionError(
|
||||
f"Unknown function: {func_name}. Available: {list(self.FUNCTIONS.keys())}"
|
||||
)
|
||||
|
||||
args = [self.visit(arg) for arg in node.args]
|
||||
return self.FUNCTIONS[func_name](*args)
|
||||
|
||||
def visit_List(self, node: ast.List) -> list:
|
||||
return [self.visit(elt) for elt in node.elts]
|
||||
|
||||
def visit_Tuple(self, node: ast.Tuple) -> tuple:
|
||||
return tuple(self.visit(elt) for elt in node.elts)
|
||||
|
||||
|
||||
class MathUtilsComponent(
|
||||
DirectiveProvider, CommandProvider, ConfigurableComponent[MathUtilsConfiguration]
|
||||
):
|
||||
"""Provides commands for mathematical calculations and statistics."""
|
||||
|
||||
config_class = MathUtilsConfiguration
|
||||
|
||||
def __init__(self, config: Optional[MathUtilsConfiguration] = None):
|
||||
ConfigurableComponent.__init__(self, config)
|
||||
|
||||
def get_resources(self) -> Iterator[str]:
|
||||
yield "Ability to perform mathematical calculations and statistical analysis."
|
||||
|
||||
def get_commands(self) -> Iterator[Command]:
|
||||
yield self.calculate
|
||||
yield self.statistics_calc
|
||||
yield self.convert_units
|
||||
|
||||
@command(
|
||||
["calculate", "eval_math", "compute"],
|
||||
"Safely evaluate a mathematical expression. Supports +, -, *, /, //, %, ** operators and functions like sqrt, sin, cos, log.",
|
||||
{
|
||||
"expression": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Mathematical expression to evaluate (e.g., '2 * pi + sqrt(16)')",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def calculate(self, expression: str) -> str:
|
||||
"""Safely evaluate a mathematical expression.
|
||||
|
||||
Args:
|
||||
expression: The expression to evaluate
|
||||
|
||||
Returns:
|
||||
str: The result as JSON
|
||||
"""
|
||||
try:
|
||||
tree = ast.parse(expression, mode="eval")
|
||||
evaluator = SafeEvaluator()
|
||||
result = evaluator.visit(tree)
|
||||
|
||||
return json.dumps({"expression": expression, "result": result}, indent=2)
|
||||
|
||||
except SyntaxError as e:
|
||||
raise CommandExecutionError(f"Invalid expression syntax: {e}")
|
||||
except ZeroDivisionError:
|
||||
raise CommandExecutionError("Division by zero")
|
||||
except OverflowError:
|
||||
raise CommandExecutionError("Result too large")
|
||||
except Exception as e:
|
||||
raise CommandExecutionError(f"Calculation error: {e}")
|
||||
|
||||
@command(
|
||||
["statistics", "stats_calc"],
|
||||
"Calculate statistics on a list of numbers.",
|
||||
{
|
||||
"numbers": JSONSchema(
|
||||
type=JSONSchema.Type.ARRAY,
|
||||
items=JSONSchema(type=JSONSchema.Type.NUMBER),
|
||||
description="List of numbers to analyze",
|
||||
required=True,
|
||||
),
|
||||
"operations": JSONSchema(
|
||||
type=JSONSchema.Type.ARRAY,
|
||||
items=JSONSchema(type=JSONSchema.Type.STRING),
|
||||
description="Statistics to compute: mean, median, mode, stdev, variance, min, max, sum, count (default: all)",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def statistics_calc(
|
||||
self,
|
||||
numbers: list[float],
|
||||
operations: list[str] | None = None,
|
||||
) -> str:
|
||||
"""Calculate statistics on a list of numbers.
|
||||
|
||||
Args:
|
||||
numbers: List of numbers
|
||||
operations: Which statistics to compute
|
||||
|
||||
Returns:
|
||||
str: JSON with requested statistics
|
||||
"""
|
||||
if not numbers:
|
||||
raise CommandExecutionError("Empty list provided")
|
||||
|
||||
all_ops = [
|
||||
"mean",
|
||||
"median",
|
||||
"mode",
|
||||
"stdev",
|
||||
"variance",
|
||||
"min",
|
||||
"max",
|
||||
"sum",
|
||||
"count",
|
||||
]
|
||||
ops = operations if operations else all_ops
|
||||
|
||||
result = {}
|
||||
errors = []
|
||||
|
||||
for op in ops:
|
||||
try:
|
||||
if op == "mean":
|
||||
result["mean"] = statistics.mean(numbers)
|
||||
elif op == "median":
|
||||
result["median"] = statistics.median(numbers)
|
||||
elif op == "mode":
|
||||
try:
|
||||
result["mode"] = statistics.mode(numbers)
|
||||
except statistics.StatisticsError:
|
||||
result["mode"] = None
|
||||
errors.append("No unique mode found")
|
||||
elif op == "stdev":
|
||||
if len(numbers) > 1:
|
||||
result["stdev"] = statistics.stdev(numbers)
|
||||
else:
|
||||
result["stdev"] = 0
|
||||
elif op == "variance":
|
||||
if len(numbers) > 1:
|
||||
result["variance"] = statistics.variance(numbers)
|
||||
else:
|
||||
result["variance"] = 0
|
||||
elif op == "min":
|
||||
result["min"] = min(numbers)
|
||||
elif op == "max":
|
||||
result["max"] = max(numbers)
|
||||
elif op == "sum":
|
||||
result["sum"] = sum(numbers)
|
||||
elif op == "count":
|
||||
result["count"] = len(numbers)
|
||||
else:
|
||||
errors.append(f"Unknown operation: {op}")
|
||||
except Exception as e:
|
||||
errors.append(f"{op}: {e}")
|
||||
|
||||
output: dict[str, Any] = {"statistics": result}
|
||||
if errors:
|
||||
output["errors"] = errors
|
||||
|
||||
return json.dumps(output, indent=2)
|
||||
|
||||
@command(
|
||||
["convert_units", "unit_conversion"],
|
||||
"Convert between units of measurement.",
|
||||
{
|
||||
"value": JSONSchema(
|
||||
type=JSONSchema.Type.NUMBER,
|
||||
description="The value to convert",
|
||||
required=True,
|
||||
),
|
||||
"from_unit": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Source unit (e.g., 'km', 'miles', 'celsius', 'kg')",
|
||||
required=True,
|
||||
),
|
||||
"to_unit": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Target unit (e.g., 'm', 'feet', 'fahrenheit', 'lbs')",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def convert_units(
|
||||
self,
|
||||
value: float,
|
||||
from_unit: str,
|
||||
to_unit: str,
|
||||
) -> str:
|
||||
"""Convert between units of measurement.
|
||||
|
||||
Args:
|
||||
value: The value to convert
|
||||
from_unit: Source unit
|
||||
to_unit: Target unit
|
||||
|
||||
Returns:
|
||||
str: JSON with conversion result
|
||||
"""
|
||||
# Normalize unit names
|
||||
from_unit = from_unit.lower().strip()
|
||||
to_unit = to_unit.lower().strip()
|
||||
|
||||
# Unit conversions to base units
|
||||
# Length -> meters
|
||||
length_to_m = {
|
||||
"m": 1,
|
||||
"meter": 1,
|
||||
"meters": 1,
|
||||
"km": 1000,
|
||||
"kilometer": 1000,
|
||||
"kilometers": 1000,
|
||||
"cm": 0.01,
|
||||
"centimeter": 0.01,
|
||||
"centimeters": 0.01,
|
||||
"mm": 0.001,
|
||||
"millimeter": 0.001,
|
||||
"millimeters": 0.001,
|
||||
"mi": 1609.344,
|
||||
"mile": 1609.344,
|
||||
"miles": 1609.344,
|
||||
"yd": 0.9144,
|
||||
"yard": 0.9144,
|
||||
"yards": 0.9144,
|
||||
"ft": 0.3048,
|
||||
"foot": 0.3048,
|
||||
"feet": 0.3048,
|
||||
"in": 0.0254,
|
||||
"inch": 0.0254,
|
||||
"inches": 0.0254,
|
||||
}
|
||||
|
||||
# Weight -> kilograms
|
||||
weight_to_kg = {
|
||||
"kg": 1,
|
||||
"kilogram": 1,
|
||||
"kilograms": 1,
|
||||
"g": 0.001,
|
||||
"gram": 0.001,
|
||||
"grams": 0.001,
|
||||
"mg": 0.000001,
|
||||
"milligram": 0.000001,
|
||||
"milligrams": 0.000001,
|
||||
"lb": 0.453592,
|
||||
"lbs": 0.453592,
|
||||
"pound": 0.453592,
|
||||
"pounds": 0.453592,
|
||||
"oz": 0.0283495,
|
||||
"ounce": 0.0283495,
|
||||
"ounces": 0.0283495,
|
||||
}
|
||||
|
||||
# Temperature (special handling)
|
||||
temp_units = {"c", "celsius", "f", "fahrenheit", "k", "kelvin"}
|
||||
|
||||
# Volume -> liters
|
||||
volume_to_l = {
|
||||
"l": 1,
|
||||
"liter": 1,
|
||||
"liters": 1,
|
||||
"litre": 1,
|
||||
"litres": 1,
|
||||
"ml": 0.001,
|
||||
"milliliter": 0.001,
|
||||
"milliliters": 0.001,
|
||||
"gal": 3.78541,
|
||||
"gallon": 3.78541,
|
||||
"gallons": 3.78541,
|
||||
"qt": 0.946353,
|
||||
"quart": 0.946353,
|
||||
"quarts": 0.946353,
|
||||
"pt": 0.473176,
|
||||
"pint": 0.473176,
|
||||
"pints": 0.473176,
|
||||
"cup": 0.236588,
|
||||
"cups": 0.236588,
|
||||
"fl oz": 0.0295735,
|
||||
"floz": 0.0295735,
|
||||
}
|
||||
|
||||
# Time -> seconds
|
||||
time_to_s = {
|
||||
"s": 1,
|
||||
"sec": 1,
|
||||
"second": 1,
|
||||
"seconds": 1,
|
||||
"min": 60,
|
||||
"minute": 60,
|
||||
"minutes": 60,
|
||||
"h": 3600,
|
||||
"hr": 3600,
|
||||
"hour": 3600,
|
||||
"hours": 3600,
|
||||
"d": 86400,
|
||||
"day": 86400,
|
||||
"days": 86400,
|
||||
"week": 604800,
|
||||
"weeks": 604800,
|
||||
}
|
||||
|
||||
# Data -> bytes
|
||||
data_to_bytes = {
|
||||
"b": 1,
|
||||
"byte": 1,
|
||||
"bytes": 1,
|
||||
"kb": 1024,
|
||||
"kilobyte": 1024,
|
||||
"kilobytes": 1024,
|
||||
"mb": 1024**2,
|
||||
"megabyte": 1024**2,
|
||||
"megabytes": 1024**2,
|
||||
"gb": 1024**3,
|
||||
"gigabyte": 1024**3,
|
||||
"gigabytes": 1024**3,
|
||||
"tb": 1024**4,
|
||||
"terabyte": 1024**4,
|
||||
"terabytes": 1024**4,
|
||||
}
|
||||
|
||||
# Temperature conversions
|
||||
if from_unit in temp_units and to_unit in temp_units:
|
||||
# Convert to Celsius first
|
||||
if from_unit in ("c", "celsius"):
|
||||
celsius = value
|
||||
elif from_unit in ("f", "fahrenheit"):
|
||||
celsius = (value - 32) * 5 / 9
|
||||
elif from_unit in ("k", "kelvin"):
|
||||
celsius = value - 273.15
|
||||
else:
|
||||
raise CommandExecutionError(f"Unknown temperature unit: {from_unit}")
|
||||
|
||||
# Convert from Celsius to target
|
||||
if to_unit in ("c", "celsius"):
|
||||
result = celsius
|
||||
elif to_unit in ("f", "fahrenheit"):
|
||||
result = celsius * 9 / 5 + 32
|
||||
elif to_unit in ("k", "kelvin"):
|
||||
result = celsius + 273.15
|
||||
else:
|
||||
raise CommandExecutionError(f"Unknown temperature unit: {to_unit}")
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"value": value,
|
||||
"from_unit": from_unit,
|
||||
"to_unit": to_unit,
|
||||
"result": round(result, 6),
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
# Find matching conversion table
|
||||
for conv_table in [
|
||||
length_to_m,
|
||||
weight_to_kg,
|
||||
volume_to_l,
|
||||
time_to_s,
|
||||
data_to_bytes,
|
||||
]:
|
||||
if from_unit in conv_table and to_unit in conv_table:
|
||||
# Convert through base unit
|
||||
base_value = value * conv_table[from_unit]
|
||||
result = base_value / conv_table[to_unit]
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"value": value,
|
||||
"from_unit": from_unit,
|
||||
"to_unit": to_unit,
|
||||
"result": round(result, 6),
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
raise CommandExecutionError(
|
||||
f"Cannot convert from '{from_unit}' to '{to_unit}'. "
|
||||
"Units must be in the same category (length, weight, volume, time, temperature, data)."
|
||||
)
|
||||
6
classic/forge/forge/components/text_utils/__init__.py
Normal file
6
classic/forge/forge/components/text_utils/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from forge.components.text_utils.text_utils import (
|
||||
TextUtilsComponent,
|
||||
TextUtilsConfiguration,
|
||||
)
|
||||
|
||||
__all__ = ["TextUtilsComponent", "TextUtilsConfiguration"]
|
||||
378
classic/forge/forge/components/text_utils/text_utils.py
Normal file
378
classic/forge/forge/components/text_utils/text_utils.py
Normal file
@@ -0,0 +1,378 @@
|
||||
import base64
|
||||
import html
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import urllib.parse
|
||||
from typing import Iterator, Literal, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from forge.agent.components import ConfigurableComponent
|
||||
from forge.agent.protocols import CommandProvider, DirectiveProvider
|
||||
from forge.command import Command, command
|
||||
from forge.models.json_schema import JSONSchema
|
||||
from forge.utils.exceptions import CommandExecutionError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TextUtilsConfiguration(BaseModel):
|
||||
max_text_length: int = Field(
|
||||
default=100000, description="Maximum text length to process"
|
||||
)
|
||||
max_matches: int = Field(
|
||||
default=1000, description="Maximum number of regex matches to return"
|
||||
)
|
||||
|
||||
|
||||
class TextUtilsComponent(
|
||||
DirectiveProvider, CommandProvider, ConfigurableComponent[TextUtilsConfiguration]
|
||||
):
|
||||
"""Provides commands for text manipulation, regex operations, and encoding."""
|
||||
|
||||
config_class = TextUtilsConfiguration
|
||||
|
||||
def __init__(self, config: Optional[TextUtilsConfiguration] = None):
|
||||
ConfigurableComponent.__init__(self, config)
|
||||
|
||||
def get_resources(self) -> Iterator[str]:
|
||||
yield "Ability to manipulate text with regex and encoding operations."
|
||||
|
||||
def get_commands(self) -> Iterator[Command]:
|
||||
yield self.regex_search
|
||||
yield self.regex_replace
|
||||
yield self.encode_text
|
||||
yield self.decode_text
|
||||
yield self.format_template
|
||||
|
||||
def _parse_flags(self, flags: str | None) -> int:
|
||||
"""Parse regex flag string into re flags.
|
||||
|
||||
Args:
|
||||
flags: String of flags (i, m, s, x)
|
||||
|
||||
Returns:
|
||||
int: Combined re flags
|
||||
"""
|
||||
if not flags:
|
||||
return 0
|
||||
|
||||
flag_map = {
|
||||
"i": re.IGNORECASE,
|
||||
"m": re.MULTILINE,
|
||||
"s": re.DOTALL,
|
||||
"x": re.VERBOSE,
|
||||
}
|
||||
|
||||
result = 0
|
||||
for char in flags.lower():
|
||||
if char in flag_map:
|
||||
result |= flag_map[char]
|
||||
|
||||
return result
|
||||
|
||||
@command(
|
||||
["regex_search", "find_pattern"],
|
||||
"Search text for matches using a regular expression pattern.",
|
||||
{
|
||||
"text": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The text to search in",
|
||||
required=True,
|
||||
),
|
||||
"pattern": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The regex pattern to search for",
|
||||
required=True,
|
||||
),
|
||||
"flags": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Regex flags: i=ignore case, m=multiline, s=dotall, x=verbose",
|
||||
required=False,
|
||||
),
|
||||
"return_groups": JSONSchema(
|
||||
type=JSONSchema.Type.BOOLEAN,
|
||||
description="Return capture groups instead of full matches (default: False)",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def regex_search(
|
||||
self,
|
||||
text: str,
|
||||
pattern: str,
|
||||
flags: str | None = None,
|
||||
return_groups: bool = False,
|
||||
) -> str:
|
||||
"""Search text using regex pattern.
|
||||
|
||||
Args:
|
||||
text: The text to search
|
||||
pattern: The regex pattern
|
||||
flags: Optional flags string
|
||||
return_groups: Whether to return capture groups
|
||||
|
||||
Returns:
|
||||
str: JSON array of matches
|
||||
"""
|
||||
if len(text) > self.config.max_text_length:
|
||||
raise CommandExecutionError(
|
||||
f"Text exceeds maximum length of {self.config.max_text_length}"
|
||||
)
|
||||
|
||||
try:
|
||||
regex = re.compile(pattern, self._parse_flags(flags))
|
||||
except re.error as e:
|
||||
raise CommandExecutionError(f"Invalid regex pattern: {e}")
|
||||
|
||||
matches = []
|
||||
for match in regex.finditer(text):
|
||||
if len(matches) >= self.config.max_matches:
|
||||
break
|
||||
|
||||
if return_groups and match.groups():
|
||||
matches.append(
|
||||
{
|
||||
"match": match.group(0),
|
||||
"groups": match.groups(),
|
||||
"start": match.start(),
|
||||
"end": match.end(),
|
||||
}
|
||||
)
|
||||
else:
|
||||
matches.append(
|
||||
{
|
||||
"match": match.group(0),
|
||||
"start": match.start(),
|
||||
"end": match.end(),
|
||||
}
|
||||
)
|
||||
|
||||
result = {
|
||||
"count": len(matches),
|
||||
"matches": matches,
|
||||
}
|
||||
|
||||
if len(matches) >= self.config.max_matches:
|
||||
result["truncated"] = True
|
||||
|
||||
return json.dumps(result, indent=2)
|
||||
|
||||
@command(
|
||||
["regex_replace", "replace_pattern"],
|
||||
"Replace text matching a regex pattern with a replacement string.",
|
||||
{
|
||||
"text": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The text to search and replace in",
|
||||
required=True,
|
||||
),
|
||||
"pattern": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The regex pattern to match",
|
||||
required=True,
|
||||
),
|
||||
"replacement": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The replacement string (can use \\1, \\2 for groups)",
|
||||
required=True,
|
||||
),
|
||||
"flags": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Regex flags: i=ignore case, m=multiline, s=dotall, x=verbose",
|
||||
required=False,
|
||||
),
|
||||
"count": JSONSchema(
|
||||
type=JSONSchema.Type.INTEGER,
|
||||
description="Maximum replacements (0 = all, default: 0)",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
def regex_replace(
|
||||
self,
|
||||
text: str,
|
||||
pattern: str,
|
||||
replacement: str,
|
||||
flags: str | None = None,
|
||||
count: int = 0,
|
||||
) -> str:
|
||||
"""Replace text matching regex pattern.
|
||||
|
||||
Args:
|
||||
text: The text to modify
|
||||
pattern: The regex pattern
|
||||
replacement: The replacement string
|
||||
flags: Optional flags string
|
||||
count: Max replacements (0 = unlimited)
|
||||
|
||||
Returns:
|
||||
str: The modified text with replacement info
|
||||
"""
|
||||
if len(text) > self.config.max_text_length:
|
||||
raise CommandExecutionError(
|
||||
f"Text exceeds maximum length of {self.config.max_text_length}"
|
||||
)
|
||||
|
||||
try:
|
||||
regex = re.compile(pattern, self._parse_flags(flags))
|
||||
except re.error as e:
|
||||
raise CommandExecutionError(f"Invalid regex pattern: {e}")
|
||||
|
||||
# Count matches before replacement
|
||||
match_count = len(regex.findall(text))
|
||||
|
||||
# Perform replacement
|
||||
result = regex.sub(replacement, text, count=count if count > 0 else 0)
|
||||
|
||||
actual_replacements = min(match_count, count) if count > 0 else match_count
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"result": result,
|
||||
"replacements_made": actual_replacements,
|
||||
"pattern": pattern,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
@command(
|
||||
["encode_text"],
|
||||
"Encode text using various encoding schemes.",
|
||||
{
|
||||
"text": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The text to encode",
|
||||
required=True,
|
||||
),
|
||||
"encoding": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Encoding type: base64, url, html, hex",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def encode_text(
|
||||
self, text: str, encoding: Literal["base64", "url", "html", "hex"]
|
||||
) -> str:
|
||||
"""Encode text using specified encoding.
|
||||
|
||||
Args:
|
||||
text: The text to encode
|
||||
encoding: The encoding type
|
||||
|
||||
Returns:
|
||||
str: The encoded text
|
||||
"""
|
||||
if encoding == "base64":
|
||||
result = base64.b64encode(text.encode("utf-8")).decode("ascii")
|
||||
elif encoding == "url":
|
||||
result = urllib.parse.quote(text, safe="")
|
||||
elif encoding == "html":
|
||||
result = html.escape(text)
|
||||
elif encoding == "hex":
|
||||
result = text.encode("utf-8").hex()
|
||||
else:
|
||||
raise CommandExecutionError(
|
||||
f"Unknown encoding: {encoding}. Supported: base64, url, html, hex"
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{"original": text, "encoding": encoding, "result": result}, indent=2
|
||||
)
|
||||
|
||||
@command(
|
||||
["decode_text"],
|
||||
"Decode text from various encoding schemes.",
|
||||
{
|
||||
"text": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="The text to decode",
|
||||
required=True,
|
||||
),
|
||||
"encoding": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Encoding type: base64, url, html, hex",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def decode_text(
|
||||
self, text: str, encoding: Literal["base64", "url", "html", "hex"]
|
||||
) -> str:
|
||||
"""Decode text from specified encoding.
|
||||
|
||||
Args:
|
||||
text: The text to decode
|
||||
encoding: The encoding type
|
||||
|
||||
Returns:
|
||||
str: The decoded text
|
||||
"""
|
||||
try:
|
||||
if encoding == "base64":
|
||||
result = base64.b64decode(text).decode("utf-8")
|
||||
elif encoding == "url":
|
||||
result = urllib.parse.unquote(text)
|
||||
elif encoding == "html":
|
||||
result = html.unescape(text)
|
||||
elif encoding == "hex":
|
||||
result = bytes.fromhex(text).decode("utf-8")
|
||||
else:
|
||||
raise CommandExecutionError(
|
||||
f"Unknown encoding: {encoding}. Supported: base64, url, html, hex"
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{"original": text, "encoding": encoding, "result": result}, indent=2
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise CommandExecutionError(f"Decoding failed: {e}")
|
||||
|
||||
@command(
|
||||
["format_template", "template_substitute"],
|
||||
"Substitute variables in a template string using {variable} syntax.",
|
||||
{
|
||||
"template": JSONSchema(
|
||||
type=JSONSchema.Type.STRING,
|
||||
description="Template with {variable} placeholders",
|
||||
required=True,
|
||||
),
|
||||
"variables": JSONSchema(
|
||||
type=JSONSchema.Type.OBJECT,
|
||||
description="Dictionary of variable names to values",
|
||||
required=True,
|
||||
),
|
||||
},
|
||||
)
|
||||
def format_template(self, template: str, variables: dict[str, str]) -> str:
|
||||
"""Substitute variables in a template.
|
||||
|
||||
Args:
|
||||
template: The template string with {placeholders}
|
||||
variables: Dictionary of variable values
|
||||
|
||||
Returns:
|
||||
str: The formatted string
|
||||
"""
|
||||
try:
|
||||
# Use safe substitution that only replaces found keys
|
||||
result = template
|
||||
for key, value in variables.items():
|
||||
result = result.replace("{" + key + "}", str(value))
|
||||
|
||||
# Check for unfilled placeholders
|
||||
unfilled = re.findall(r"\{(\w+)\}", result)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"result": result,
|
||||
"unfilled_placeholders": unfilled if unfilled else None,
|
||||
},
|
||||
indent=2,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
raise CommandExecutionError(f"Template formatting failed: {e}")
|
||||
Reference in New Issue
Block a user