Compare commits

..

3 Commits

Author SHA1 Message Date
Bentlybro
1ed748a356 refactor(backend): revert selective COPY, keep cleanup approach
Address review feedback: keep COPY --from=builder /app /app to avoid
maintenance burden of selective copies. The builder cleanup step still
removes __pycache__, test dirs, pip/poetry caches for size reduction.

Added clarifying comment about --only main referencing the development
docs (dev deps are installed locally, not in production images).
2026-01-31 19:56:21 +00:00
Bentlybro
9c28639c32 fix: address review feedback — keep setuptools, remove redundant mkdir, add comments
- Keep setuptools in cleanup (it's a direct dependency, used by aioclamd
  via pkg_resources at runtime)
- Remove redundant mkdir -p commands (COPY already creates dirs)
- Add clarifying comments for the autogpt_libs double-copy pattern
- Use || true instead of trailing ; true for cleaner error handling
2026-01-31 18:44:15 +00:00
Bentlybro
4f37a12743 docker: optimize backend image size — reduce ~862MB COPY layer
- Install only main dependencies (skip dev deps like pytest, black, ruff)
- Clean up build artifacts, caches, and unnecessary packages
- Replace wholesale COPY with selective copying of required files
- Add --no-cache-dir to pip install

This reduces the bloated 862MB layer from COPY --from=builder /app /app
by only copying what's actually needed at runtime: virtualenv, libs,
schema, and Prisma-generated types. All 7 backend services benefit.
2026-01-31 18:29:09 +00:00
19 changed files with 75 additions and 221 deletions

View File

@@ -37,13 +37,15 @@ ENV POETRY_VIRTUALENVS_CREATE=true
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
ENV PATH=/opt/poetry/bin:$PATH
RUN pip3 install poetry --break-system-packages
RUN pip3 install --no-cache-dir poetry --break-system-packages
# Copy and install dependencies
COPY autogpt_platform/autogpt_libs /app/autogpt_platform/autogpt_libs
COPY autogpt_platform/backend/poetry.lock autogpt_platform/backend/pyproject.toml /app/autogpt_platform/backend/
WORKDIR /app/autogpt_platform/backend
RUN poetry install --no-ansi --no-root
# Production image only needs runtime deps; dev deps (pytest, black, ruff, etc.)
# are installed locally via `poetry install --with dev` per the development docs
RUN poetry install --no-ansi --no-root --only main
# Generate Prisma client
COPY autogpt_platform/backend/schema.prisma ./
@@ -51,6 +53,15 @@ COPY autogpt_platform/backend/backend/data/partial_types.py ./backend/data/parti
COPY autogpt_platform/backend/gen_prisma_types_stub.py ./
RUN poetry run prisma generate && poetry run gen-prisma-stub
# Clean up build artifacts and caches to reduce layer size
# Note: setuptools is kept as it's a direct dependency (used by aioclamd via pkg_resources)
RUN find /app -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true; \
find /app -type d -name tests -exec rm -rf {} + 2>/dev/null || true; \
find /app -type d -name test -exec rm -rf {} + 2>/dev/null || true; \
rm -rf /app/autogpt_platform/backend/.venv/lib/python*/site-packages/pip* \
/root/.cache/pip \
/root/.cache/pypoetry
FROM debian:13-slim AS server_dependencies
WORKDIR /app
@@ -68,7 +79,7 @@ RUN apt-get update && apt-get install -y \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
# Copy only necessary files from builder
# Copy built artifacts from builder (cleaned of caches, __pycache__, and test dirs)
COPY --from=builder /app /app
COPY --from=builder /usr/local/lib/python3* /usr/local/lib/python3*
COPY --from=builder /usr/local/bin/poetry /usr/local/bin/poetry
@@ -81,9 +92,7 @@ COPY --from=builder /root/.cache/prisma-python/binaries /root/.cache/prisma-pyth
ENV PATH="/app/autogpt_platform/backend/.venv/bin:$PATH"
RUN mkdir -p /app/autogpt_platform/autogpt_libs
RUN mkdir -p /app/autogpt_platform/backend
# Copy fresh source from context (overwrites builder's copy with latest source)
COPY autogpt_platform/autogpt_libs /app/autogpt_platform/autogpt_libs
COPY autogpt_platform/backend/poetry.lock autogpt_platform/backend/pyproject.toml /app/autogpt_platform/backend/

View File

@@ -14,7 +14,6 @@ from backend.data.graph import (
create_graph,
get_graph,
get_graph_all_versions,
get_store_listed_graphs,
)
from backend.util.exceptions import DatabaseError, NotFoundError
@@ -267,18 +266,18 @@ async def get_library_agents_for_generation(
async def search_marketplace_agents_for_generation(
search_query: str,
max_results: int = 10,
) -> list[LibraryAgentSummary]:
) -> list[MarketplaceAgentSummary]:
"""Search marketplace agents formatted for Agent Generator.
Fetches marketplace agents and their full schemas so they can be used
as sub-agents in generated workflows.
Note: This returns basic agent info. Full input/output schemas would require
additional graph fetches and is a potential future enhancement.
Args:
search_query: Search term to find relevant public agents
max_results: Maximum number of agents to return (default 10)
Returns:
List of LibraryAgentSummary with full input/output schemas
List of MarketplaceAgentSummary (without detailed schemas for now)
"""
try:
response = await store_db.get_store_agents(
@@ -287,31 +286,17 @@ async def search_marketplace_agents_for_generation(
page_size=max_results,
)
agents_with_graphs = [
agent for agent in response.agents if agent.agent_graph_id
]
if not agents_with_graphs:
return []
graph_ids = [agent.agent_graph_id for agent in agents_with_graphs]
graphs = await get_store_listed_graphs(*graph_ids)
results: list[LibraryAgentSummary] = []
for agent in agents_with_graphs:
graph_id = agent.agent_graph_id
if graph_id and graph_id in graphs:
graph = graphs[graph_id]
results.append(
LibraryAgentSummary(
graph_id=graph.id,
graph_version=graph.version,
name=agent.agent_name,
description=agent.description,
input_schema=graph.input_schema,
output_schema=graph.output_schema,
)
results: list[MarketplaceAgentSummary] = []
for agent in response.agents:
results.append(
MarketplaceAgentSummary(
name=agent.agent_name,
description=agent.description,
sub_heading=agent.sub_heading,
creator=agent.creator,
is_marketplace_agent=True,
)
)
return results
except Exception as e:
logger.warning(f"Failed to search marketplace agents: {e}")
@@ -342,7 +327,8 @@ async def get_all_relevant_agents_for_generation(
max_marketplace_results: Max marketplace agents to return (default 10)
Returns:
List of AgentSummary with full schemas (both library and marketplace agents)
List of AgentSummary, library agents first (with full schemas),
then marketplace agents (basic info only)
"""
agents: list[AgentSummary] = []
seen_graph_ids: set[str] = set()
@@ -379,11 +365,16 @@ async def get_all_relevant_agents_for_generation(
search_query=search_query,
max_results=max_marketplace_results,
)
library_names: set[str] = set()
for a in agents:
name = a.get("name")
if name and isinstance(name, str):
library_names.add(name.lower())
for agent in marketplace_agents:
graph_id = agent.get("graph_id")
if graph_id and graph_id not in seen_graph_ids:
agents.append(agent)
seen_graph_ids.add(graph_id)
agent_name = agent.get("name")
if agent_name and isinstance(agent_name, str):
if agent_name.lower() not in library_names:
agents.append(agent)
return agents

View File

@@ -112,7 +112,6 @@ async def get_store_agents(
description=agent["description"],
runs=agent["runs"],
rating=agent["rating"],
agent_graph_id=agent.get("agentGraphId", ""),
)
store_agents.append(store_agent)
except Exception as e:
@@ -171,7 +170,6 @@ async def get_store_agents(
description=agent.description,
runs=agent.runs,
rating=agent.rating,
agent_graph_id=agent.agentGraphId,
)
# Add to the list only if creation was successful
store_agents.append(store_agent)

View File

@@ -600,7 +600,6 @@ async def hybrid_search(
sa.featured,
sa.is_available,
sa.updated_at,
sa."agentGraphId",
-- Searchable text for BM25 reranking
COALESCE(sa.agent_name, '') || ' ' || COALESCE(sa.sub_heading, '') || ' ' || COALESCE(sa.description, '') as searchable_text,
-- Semantic score
@@ -660,7 +659,6 @@ async def hybrid_search(
featured,
is_available,
updated_at,
"agentGraphId",
searchable_text,
semantic_score,
lexical_score,

View File

@@ -38,7 +38,6 @@ class StoreAgent(pydantic.BaseModel):
description: str
runs: int
rating: float
agent_graph_id: str
class StoreAgentsResponse(pydantic.BaseModel):

View File

@@ -26,13 +26,11 @@ def test_store_agent():
description="Test description",
runs=50,
rating=4.5,
agent_graph_id="test-graph-id",
)
assert agent.slug == "test-agent"
assert agent.agent_name == "Test Agent"
assert agent.runs == 50
assert agent.rating == 4.5
assert agent.agent_graph_id == "test-graph-id"
def test_store_agents_response():
@@ -48,7 +46,6 @@ def test_store_agents_response():
description="Test description",
runs=50,
rating=4.5,
agent_graph_id="test-graph-id",
)
],
pagination=store_model.Pagination(

View File

@@ -82,7 +82,6 @@ def test_get_agents_featured(
description="Featured agent description",
runs=100,
rating=4.5,
agent_graph_id="test-graph-1",
)
],
pagination=store_model.Pagination(
@@ -128,7 +127,6 @@ def test_get_agents_by_creator(
description="Creator agent description",
runs=50,
rating=4.0,
agent_graph_id="test-graph-2",
)
],
pagination=store_model.Pagination(
@@ -174,7 +172,6 @@ def test_get_agents_sorted(
description="Top agent description",
runs=1000,
rating=5.0,
agent_graph_id="test-graph-3",
)
],
pagination=store_model.Pagination(
@@ -220,7 +217,6 @@ def test_get_agents_search(
description="Specific search term description",
runs=75,
rating=4.2,
agent_graph_id="test-graph-search",
)
],
pagination=store_model.Pagination(
@@ -266,7 +262,6 @@ def test_get_agents_category(
description="Category agent description",
runs=60,
rating=4.1,
agent_graph_id="test-graph-category",
)
],
pagination=store_model.Pagination(
@@ -311,7 +306,6 @@ def test_get_agents_pagination(
description=f"Agent {i} description",
runs=i * 10,
rating=4.0,
agent_graph_id="test-graph-2",
)
for i in range(5)
],

View File

@@ -33,7 +33,6 @@ class TestCacheDeletion:
description="Test description",
runs=100,
rating=4.5,
agent_graph_id="test-graph-id",
)
],
pagination=Pagination(

View File

@@ -1028,39 +1028,6 @@ async def get_graph(
return GraphModel.from_db(graph, for_export)
async def get_store_listed_graphs(*graph_ids: str) -> dict[str, GraphModel]:
"""Batch-fetch multiple store-listed graphs by their IDs.
Only returns graphs that have approved store listings (publicly available).
Does not require permission checks since store-listed graphs are public.
Args:
*graph_ids: Variable number of graph IDs to fetch
Returns:
Dict mapping graph_id to GraphModel for graphs with approved store listings
"""
if not graph_ids:
return {}
store_listings = await StoreListingVersion.prisma().find_many(
where={
"agentGraphId": {"in": list(graph_ids)},
"submissionStatus": SubmissionStatus.APPROVED,
"isDeleted": False,
},
include={"AgentGraph": {"include": AGENT_GRAPH_INCLUDE}},
distinct=["agentGraphId"],
order={"agentGraphVersion": "desc"},
)
return {
listing.agentGraphId: GraphModel.from_db(listing.AgentGraph)
for listing in store_listings
if listing.AgentGraph
}
async def get_graph_as_admin(
graph_id: str,
version: int | None = None,

View File

@@ -193,11 +193,9 @@ async def _handle_graph_validation_error(args: "GraphExecutionJobArgs") -> None:
user_id=args.user_id,
)
else:
logger.warning(
f"Old scheduled job for graph {args.graph_id} (user {args.user_id}) "
f"has no schedule_id, attempting targeted cleanup"
logger.error(
f"Unable to unschedule graph: {args.graph_id} as this is an old job with no associated schedule_id please remove manually"
)
await _cleanup_old_schedules_without_id(args.graph_id, args.user_id)
async def _handle_graph_not_available(
@@ -240,35 +238,6 @@ async def _cleanup_orphaned_schedules_for_graph(graph_id: str, user_id: str) ->
)
async def _cleanup_old_schedules_without_id(graph_id: str, user_id: str) -> None:
"""Remove only schedules that have no schedule_id in their job args.
Unlike _cleanup_orphaned_schedules_for_graph (which removes ALL schedules
for a graph), this only targets legacy jobs created before schedule_id was
added to GraphExecutionJobArgs, preserving any valid newer schedules.
"""
scheduler_client = get_scheduler_client()
schedules = await scheduler_client.get_execution_schedules(
graph_id=graph_id, user_id=user_id
)
for schedule in schedules:
if schedule.schedule_id is not None:
continue
try:
await scheduler_client.delete_schedule(
schedule_id=schedule.id, user_id=user_id
)
logger.info(
f"Cleaned up old schedule {schedule.id} (no schedule_id) "
f"for graph {graph_id}"
)
except Exception:
logger.exception(
f"Failed to delete old schedule {schedule.id} for graph {graph_id}"
)
def cleanup_expired_files():
"""Clean up expired files from cloud storage."""
# Wait for completion

View File

@@ -1,39 +0,0 @@
from urllib.parse import urlparse
import fastapi
from fastapi.routing import APIRoute
from backend.api.features.integrations.router import router as integrations_router
from backend.integrations.providers import ProviderName
from backend.integrations.webhooks import utils as webhooks_utils
def test_webhook_ingress_url_matches_route(monkeypatch) -> None:
app = fastapi.FastAPI()
app.include_router(integrations_router, prefix="/api/integrations")
provider = ProviderName.GITHUB
webhook_id = "webhook_123"
base_url = "https://example.com"
monkeypatch.setattr(webhooks_utils.app_config, "platform_base_url", base_url)
route = next(
route
for route in integrations_router.routes
if isinstance(route, APIRoute)
and route.path == "/{provider}/webhooks/{webhook_id}/ingress"
and "POST" in route.methods
)
expected_path = f"/api/integrations{route.path}".format(
provider=provider.value,
webhook_id=webhook_id,
)
actual_url = urlparse(webhooks_utils.webhook_ingress_url(provider, webhook_id))
expected_base = urlparse(base_url)
assert (actual_url.scheme, actual_url.netloc) == (
expected_base.scheme,
expected_base.netloc,
)
assert actual_url.path == expected_path

View File

@@ -9,8 +9,7 @@
"sub_heading": "Creator agent subheading",
"description": "Creator agent description",
"runs": 50,
"rating": 4.0,
"agent_graph_id": "test-graph-2"
"rating": 4.0
}
],
"pagination": {

View File

@@ -9,8 +9,7 @@
"sub_heading": "Category agent subheading",
"description": "Category agent description",
"runs": 60,
"rating": 4.1,
"agent_graph_id": "test-graph-category"
"rating": 4.1
}
],
"pagination": {

View File

@@ -9,8 +9,7 @@
"sub_heading": "Agent 0 subheading",
"description": "Agent 0 description",
"runs": 0,
"rating": 4.0,
"agent_graph_id": "test-graph-2"
"rating": 4.0
},
{
"slug": "agent-1",
@@ -21,8 +20,7 @@
"sub_heading": "Agent 1 subheading",
"description": "Agent 1 description",
"runs": 10,
"rating": 4.0,
"agent_graph_id": "test-graph-2"
"rating": 4.0
},
{
"slug": "agent-2",
@@ -33,8 +31,7 @@
"sub_heading": "Agent 2 subheading",
"description": "Agent 2 description",
"runs": 20,
"rating": 4.0,
"agent_graph_id": "test-graph-2"
"rating": 4.0
},
{
"slug": "agent-3",
@@ -45,8 +42,7 @@
"sub_heading": "Agent 3 subheading",
"description": "Agent 3 description",
"runs": 30,
"rating": 4.0,
"agent_graph_id": "test-graph-2"
"rating": 4.0
},
{
"slug": "agent-4",
@@ -57,8 +53,7 @@
"sub_heading": "Agent 4 subheading",
"description": "Agent 4 description",
"runs": 40,
"rating": 4.0,
"agent_graph_id": "test-graph-2"
"rating": 4.0
}
],
"pagination": {

View File

@@ -9,8 +9,7 @@
"sub_heading": "Search agent subheading",
"description": "Specific search term description",
"runs": 75,
"rating": 4.2,
"agent_graph_id": "test-graph-search"
"rating": 4.2
}
],
"pagination": {

View File

@@ -9,8 +9,7 @@
"sub_heading": "Top agent subheading",
"description": "Top agent description",
"runs": 1000,
"rating": 5.0,
"agent_graph_id": "test-graph-3"
"rating": 5.0
}
],
"pagination": {

View File

@@ -9,8 +9,7 @@
"sub_heading": "Featured agent subheading",
"description": "Featured agent description",
"runs": 100,
"rating": 4.5,
"agent_graph_id": "test-graph-1"
"rating": 4.5
}
],
"pagination": {

View File

@@ -134,28 +134,15 @@ class TestSearchMarketplaceAgentsForGeneration:
description="A public agent",
sub_heading="Does something useful",
creator="creator-1",
agent_graph_id="graph-123",
)
]
mock_graph = MagicMock()
mock_graph.id = "graph-123"
mock_graph.version = 1
mock_graph.input_schema = {"type": "object"}
mock_graph.output_schema = {"type": "object"}
with (
patch(
"backend.api.features.store.db.get_store_agents",
new_callable=AsyncMock,
return_value=mock_response,
) as mock_search,
patch(
"backend.api.features.chat.tools.agent_generator.core.get_store_listed_graphs",
new_callable=AsyncMock,
return_value={"graph-123": mock_graph},
),
):
# The store_db is dynamically imported, so patch the import path
with patch(
"backend.api.features.store.db.get_store_agents",
new_callable=AsyncMock,
return_value=mock_response,
) as mock_search:
result = await core.search_marketplace_agents_for_generation(
search_query="automation",
max_results=10,
@@ -169,7 +156,7 @@ class TestSearchMarketplaceAgentsForGeneration:
assert len(result) == 1
assert result[0]["name"] == "Public Agent"
assert result[0]["graph_id"] == "graph-123"
assert result[0]["is_marketplace_agent"] is True
@pytest.mark.asyncio
async def test_handles_marketplace_error_gracefully(self):
@@ -206,12 +193,11 @@ class TestGetAllRelevantAgentsForGeneration:
marketplace_agents = [
{
"graph_id": "market-456",
"graph_version": 1,
"name": "Market Agent",
"description": "From marketplace",
"input_schema": {},
"output_schema": {},
"sub_heading": "Sub heading",
"creator": "creator-1",
"is_marketplace_agent": True,
}
]
@@ -239,11 +225,11 @@ class TestGetAllRelevantAgentsForGeneration:
assert result[1]["name"] == "Market Agent"
@pytest.mark.asyncio
async def test_deduplicates_by_graph_id(self):
"""Test that marketplace agents with same graph_id as library are excluded."""
async def test_deduplicates_by_name(self):
"""Test that marketplace agents with same name as library are excluded."""
library_agents = [
{
"graph_id": "shared-123",
"graph_id": "lib-123",
"graph_version": 1,
"name": "Shared Agent",
"description": "From library",
@@ -254,20 +240,18 @@ class TestGetAllRelevantAgentsForGeneration:
marketplace_agents = [
{
"graph_id": "shared-123", # Same graph_id, should be deduplicated
"graph_version": 1,
"name": "Shared Agent",
"name": "Shared Agent", # Same name, should be deduplicated
"description": "From marketplace",
"input_schema": {},
"output_schema": {},
"sub_heading": "Sub heading",
"creator": "creator-1",
"is_marketplace_agent": True,
},
{
"graph_id": "unique-456",
"graph_version": 1,
"name": "Unique Agent",
"description": "Only in marketplace",
"input_schema": {},
"output_schema": {},
"sub_heading": "Sub heading",
"creator": "creator-2",
"is_marketplace_agent": True,
},
]
@@ -289,7 +273,7 @@ class TestGetAllRelevantAgentsForGeneration:
include_marketplace=True,
)
# Shared Agent from marketplace should be excluded by graph_id
# Shared Agent from marketplace should be excluded
assert len(result) == 2
names = [a["name"] for a in result]
assert "Shared Agent" in names

View File

@@ -9833,8 +9833,7 @@
"sub_heading": { "type": "string", "title": "Sub Heading" },
"description": { "type": "string", "title": "Description" },
"runs": { "type": "integer", "title": "Runs" },
"rating": { "type": "number", "title": "Rating" },
"agent_graph_id": { "type": "string", "title": "Agent Graph Id" }
"rating": { "type": "number", "title": "Rating" }
},
"type": "object",
"required": [
@@ -9846,8 +9845,7 @@
"sub_heading",
"description",
"runs",
"rating",
"agent_graph_id"
"rating"
],
"title": "StoreAgent"
},