mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-08 03:00:28 -04:00
The `@@agptfile:` expansion system previously used content-sniffing (trying `json.loads` then `csv.Sniffer`) to decide whether to parse file content as structured data. This was fragile — a file containing just `"42"` would be parsed as an integer, and the heuristics could misfire on ambiguous content. This PR replaces content-sniffing with **extension/MIME-based format detection**. When the file has a well-known extension (`.json`, `.csv`, etc.) or MIME type fragment (`workspace://id#application/json`), the content is parsed accordingly. Unknown formats or parse failures always fall back to plain string — no surprises. > [!NOTE] > This PR builds on the `@@agptfile:` file reference protocol introduced in #12332 and the structured data auto-parsing added in #12390. > > **What is `@@agptfile:`?** > It is a special URI prefix (e.g. `@@agptfile:workspace:///report.csv`) that the CoPilot SDK expands inline before sending tool arguments to blocks. This lets the AI reference workspace files by name, and the SDK automatically reads and injects the file content. See #12332 for the full design. ### Changes 🏗️ **New utility: `backend/util/file_content_parser.py`** - `infer_format(uri)` — determines format from file extension or MIME fragment - `parse_file_content(content, fmt)` — parses content, never raises - Supported text formats: JSON, JSONL/NDJSON, CSV, TSV, YAML, TOML - Supported binary formats: Parquet (via pyarrow), Excel/XLSX (via openpyxl) - JSON scalars (strings, numbers, booleans, null) stay as strings — only containers (arrays, objects) are promoted - CSV/TSV require ≥1 row and ≥2 columns to qualify as tabular data - Added `openpyxl` dependency for Excel reading via pandas - Case-insensitive MIME fragment matching per RFC 2045 - Shared `PARSE_EXCEPTIONS` constant to avoid duplication between modules **Updated `expand_file_refs_in_args` in `file_ref.py`** - Bare refs now use `infer_format` + `parse_file_content` instead of the old `_try_parse_structured` content-sniffing function - Binary formats (parquet, xlsx) read raw bytes via `read_file_bytes` - Embedded refs (text around `@@agptfile:`) still produce plain strings - **Size guards**: Workspace and sandbox file reads now enforce a 10 MB limit (matching the existing local file limit) to prevent OOM on large files **Updated `blocks/github/commits.py`** - Consolidated `_create_blob` and `_create_binary_blob` into a single function with an `encoding` parameter **Updated copilot system prompt** - Documents the extension-based structured data parsing and supported formats **66 new tests** in `file_content_parser_test.py` covering: - Format inference (extension, MIME, case-insensitive, precedence) - All 8 format parsers (happy path + edge cases + fallbacks) - Binary format handling (string input fallback, invalid bytes fallback) - Unknown format passthrough ### Checklist 📋 #### For code changes: - [x] I have clearly listed my changes in the PR description - [x] I have made a test plan - [x] I have tested my changes according to the test plan: - [x] All 66 file_content_parser_test.py tests pass - [x] All 31 file_ref_test.py tests pass - [x] All 13 file_ref_integration_test.py tests pass - [x] `poetry run format` passes clean (including pyright)
163 lines
4.2 KiB
TOML
163 lines
4.2 KiB
TOML
[tool.poetry]
|
|
name = "autogpt-platform-backend"
|
|
version = "0.6.22"
|
|
description = "A platform for building AI-powered agentic workflows"
|
|
authors = ["AutoGPT <info@agpt.co>"]
|
|
readme = "README.md"
|
|
packages = [{ include = "backend", format = "sdist" }]
|
|
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.10,<3.14"
|
|
aio-pika = "^9.5.5"
|
|
aiohttp = "^3.10.0"
|
|
aiodns = "^3.5.0"
|
|
anthropic = "^0.79.0"
|
|
apscheduler = "^3.11.1"
|
|
autogpt-libs = { path = "../autogpt_libs", develop = true }
|
|
bleach = { extras = ["css"], version = "^6.2.0" }
|
|
claude-agent-sdk = "0.1.45" # see copilot/sdk/sdk_compat_test.py for capability checks
|
|
click = "^8.2.0"
|
|
cryptography = "^46.0"
|
|
discord-py = "^2.5.2"
|
|
e2b = "^2.15.2"
|
|
e2b-code-interpreter = "^2.0"
|
|
elevenlabs = "^1.50.0"
|
|
fastapi = "^0.128.6"
|
|
feedparser = "^6.0.11"
|
|
flake8 = "^7.3.0"
|
|
google-api-python-client = "^2.177.0"
|
|
google-auth-oauthlib = "^1.2.2"
|
|
google-cloud-storage = "^3.2.0"
|
|
googlemaps = "^4.10.0"
|
|
gravitasml = "^0.1.4"
|
|
groq = "^0.30.0"
|
|
html2text = "^2024.2.26"
|
|
jinja2 = "^3.1.6"
|
|
jsonref = "^1.1.0"
|
|
jsonschema = "^4.25.0"
|
|
langfuse = "^3.14.1"
|
|
launchdarkly-server-sdk = "^9.14.1"
|
|
mem0ai = "^0.1.115"
|
|
moviepy = "^2.1.2"
|
|
ollama = "^0.6.1"
|
|
openai = "^1.97.1"
|
|
orjson = "^3.10.0"
|
|
pika = "^1.3.2"
|
|
pinecone = "^7.3.0"
|
|
poetry = "2.1.1" # CHECK DEPENDABOT SUPPORT BEFORE UPGRADING
|
|
postmarker = "^1.0"
|
|
praw = "~7.8.1"
|
|
prisma = "^0.15.0"
|
|
rank-bm25 = "^0.2.2"
|
|
prometheus-client = "^0.24.1"
|
|
prometheus-fastapi-instrumentator = "^7.0.0"
|
|
psutil = "^7.0.0"
|
|
psycopg2-binary = "^2.9.10"
|
|
pydantic = { extras = ["email"], version = "^2.12.5" }
|
|
pydantic-settings = "^2.12.0"
|
|
pytest = "^8.4.1"
|
|
pytest-asyncio = "^1.1.0"
|
|
python-dotenv = "^1.1.1"
|
|
python-multipart = "^0.0.22"
|
|
redis = "^6.2.0"
|
|
regex = "^2025.9.18"
|
|
replicate = "^1.0.6"
|
|
sentry-sdk = {extras = ["anthropic", "fastapi", "launchdarkly", "openai", "sqlalchemy"], version = "^2.44.0"}
|
|
sqlalchemy = "^2.0.40"
|
|
strenum = "^0.4.9"
|
|
stripe = "^11.5.0"
|
|
supabase = "2.28.0"
|
|
tenacity = "^9.1.4"
|
|
todoist-api-python = "^2.1.7"
|
|
tweepy = "^4.16.0"
|
|
uvicorn = { extras = ["standard"], version = "^0.40.0" }
|
|
websockets = "^15.0"
|
|
youtube-transcript-api = "^1.2.1"
|
|
yt-dlp = "2025.12.08"
|
|
zerobouncesdk = "^1.1.2"
|
|
# NOTE: please insert new dependencies in their alphabetical location
|
|
pytest-snapshot = "^0.9.0"
|
|
aiofiles = "^25.1.0"
|
|
tiktoken = "^0.12.0"
|
|
aioclamd = "^1.0.0"
|
|
setuptools = "^80.9.0"
|
|
gcloud-aio-storage = "^9.5.0"
|
|
pandas = "^2.3.1"
|
|
firecrawl-py = "^4.3.6"
|
|
exa-py = "^1.14.20"
|
|
croniter = "^6.0.0"
|
|
stagehand = "^0.5.1"
|
|
gravitas-md2gdocs = "^0.1.0"
|
|
posthog = "^7.6.0"
|
|
fpdf2 = "^2.8.6"
|
|
langsmith = "^0.7.7"
|
|
openpyxl = "^3.1.5"
|
|
pyarrow = "^23.0.0"
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
aiohappyeyeballs = "^2.6.1"
|
|
black = "^24.10.0"
|
|
faker = "^38.2.0"
|
|
httpx = "^0.28.1"
|
|
isort = "^5.13.2"
|
|
poethepoet = "^0.41.0"
|
|
pre-commit = "^4.4.0"
|
|
pyright = "^1.1.407"
|
|
pytest-mock = "^3.15.1"
|
|
pytest-watcher = "^0.6.3"
|
|
requests = "^2.32.5"
|
|
ruff = "^0.15.0"
|
|
# NOTE: please insert new dependencies in their alphabetical location
|
|
|
|
[build-system]
|
|
requires = ["poetry-core"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.poetry.scripts]
|
|
app = "backend.app:main"
|
|
rest = "backend.rest:main"
|
|
db = "backend.db:main"
|
|
ws = "backend.ws:main"
|
|
scheduler = "backend.scheduler:main"
|
|
notification = "backend.notification:main"
|
|
executor = "backend.exec:main"
|
|
analytics-setup = "generate_views:main_setup"
|
|
analytics-views = "generate_views:main_views"
|
|
copilot-executor = "backend.copilot.executor.__main__:main"
|
|
cli = "backend.cli:main"
|
|
format = "linter:format"
|
|
lint = "linter:lint"
|
|
test = "run_tests:test"
|
|
load-store-agents = "test.load_store_agents:run"
|
|
export-api-schema = "backend.cli.generate_openapi_json:main"
|
|
gen-prisma-stub = "gen_prisma_types_stub:main"
|
|
oauth-tool = "backend.cli.oauth_tool:cli"
|
|
|
|
[tool.isort]
|
|
profile = "black"
|
|
|
|
[tool.pytest-watcher]
|
|
now = false
|
|
clear = true
|
|
delay = 0.2
|
|
runner = "pytest"
|
|
runner_args = []
|
|
patterns = ["*.py"]
|
|
ignore_patterns = []
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
asyncio_default_fixture_loop_scope = "session"
|
|
# Disable syrupy plugin to avoid conflict with pytest-snapshot
|
|
# Both provide --snapshot-update argument causing ArgumentError
|
|
addopts = "-p no:syrupy"
|
|
filterwarnings = [
|
|
"ignore:'audioop' is deprecated:DeprecationWarning:discord.player",
|
|
"ignore:invalid escape sequence:DeprecationWarning:tweepy.api",
|
|
]
|
|
|
|
[tool.ruff]
|
|
target-version = "py310"
|
|
|