mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-04-30 03:00:41 -04:00
fix(backend/copilot): address iteration-2 review on #12879
- Add ``TodoWriteResponse`` and ``TaskResponse`` to ``ToolResponseUnion`` in ``chat/routes.py`` so the ``/api/chat/schema/tool-responses`` codegen endpoint surfaces both schemas to the frontend. Regenerated openapi.json + orval client accordingly. - Restructure the Task sub-agent error handling as nested ``try/except Exception`` inside a ``try/finally``. ``CancelledError`` / ``KeyboardInterrupt`` / ``SystemExit`` now propagate naturally through the outer finally — which both resets the depth ContextVar and rolls up inner usage — so the ``except BaseException`` + ``# noqa: BLE001`` suppressor pair is no longer needed. The existing regression tests (cancellation + exception) still cover it. - Prompt guidance (``SHARED_TOOL_NOTES``) now explicitly states that sub-agents inherit the parent tool set "except ``Task`` itself"; the "plan at most one level deep" bit saves the parent model from attempting nested delegation the baseline will refuse. - Replace the stale ``(budget of 32000 chars ≈ 8000 tokens)`` note in ``test_total_schema_char_budget`` with a reference to ``_CHAR_BUDGET`` above so future budget bumps don't desync the docstring again.
This commit is contained in:
@@ -75,6 +75,8 @@ from backend.copilot.tools.models import (
|
||||
NoResultsResponse,
|
||||
SetupRequirementsResponse,
|
||||
SuggestedGoalResponse,
|
||||
TaskResponse,
|
||||
TodoWriteResponse,
|
||||
UnderstandingUpdatedResponse,
|
||||
)
|
||||
from backend.copilot.tracking import track_user_message
|
||||
@@ -1419,6 +1421,8 @@ ToolResponseUnion = (
|
||||
| MemorySearchResponse
|
||||
| MemoryForgetCandidatesResponse
|
||||
| MemoryForgetConfirmResponse
|
||||
| TodoWriteResponse
|
||||
| TaskResponse
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -987,50 +987,47 @@ async def _run_task_subagent(
|
||||
)
|
||||
inner_llm = partial(_baseline_llm_caller, state=inner_state)
|
||||
|
||||
task_exc: BaseException | None = None
|
||||
task_exc: Exception | None = None
|
||||
token = _TASK_DEPTH_VAR.set(depth + 1)
|
||||
try:
|
||||
async for loop_result in tool_call_loop(
|
||||
messages=inner_messages,
|
||||
tools=inner_tools,
|
||||
llm_call=inner_llm,
|
||||
execute_tool=inner_executor,
|
||||
update_conversation=_inner_task_conversation_updater,
|
||||
max_iterations=_MAX_TASK_ITERATIONS,
|
||||
last_iteration_message=(
|
||||
"This is the final iteration — produce your summary now."
|
||||
),
|
||||
):
|
||||
# Discard inner streaming events so only the Task envelope and
|
||||
# its final summary reach the parent client. Token accounting
|
||||
# still happens via ``inner_state`` and rolls up after the
|
||||
# loop exits.
|
||||
inner_state.pending_events.clear()
|
||||
for tc in loop_result.last_tool_calls:
|
||||
tool_names_seen.append(tc.name)
|
||||
iterations = loop_result.iterations
|
||||
finished_naturally = loop_result.finished_naturally
|
||||
if loop_result.finished_naturally:
|
||||
final_response_text = loop_result.response_text or ""
|
||||
except BaseException as exc: # noqa: BLE001
|
||||
# BaseException (not Exception) so CancelledError / KeyboardInterrupt
|
||||
# / SystemExit still land in the ``finally`` and reset the
|
||||
# ContextVar — otherwise a cancelled Task would leak an elevated
|
||||
# depth into whatever task reuses this asyncio context next.
|
||||
# Swallow only ``Exception`` below; re-raise ``BaseException`` after
|
||||
# the reset.
|
||||
task_exc = exc
|
||||
try:
|
||||
async for loop_result in tool_call_loop(
|
||||
messages=inner_messages,
|
||||
tools=inner_tools,
|
||||
llm_call=inner_llm,
|
||||
execute_tool=inner_executor,
|
||||
update_conversation=_inner_task_conversation_updater,
|
||||
max_iterations=_MAX_TASK_ITERATIONS,
|
||||
last_iteration_message=(
|
||||
"This is the final iteration — produce your summary now."
|
||||
),
|
||||
):
|
||||
# Discard inner streaming events so only the Task envelope
|
||||
# and its final summary reach the parent client. Token
|
||||
# accounting still happens via ``inner_state`` and rolls up
|
||||
# after the loop exits.
|
||||
inner_state.pending_events.clear()
|
||||
for tc in loop_result.last_tool_calls:
|
||||
tool_names_seen.append(tc.name)
|
||||
iterations = loop_result.iterations
|
||||
finished_naturally = loop_result.finished_naturally
|
||||
if loop_result.finished_naturally:
|
||||
final_response_text = loop_result.response_text or ""
|
||||
except Exception as exc:
|
||||
task_exc = exc
|
||||
# ``CancelledError`` / ``KeyboardInterrupt`` / ``SystemExit``
|
||||
# derive from ``BaseException`` and are intentionally NOT caught
|
||||
# here — they propagate through the outer ``finally`` below, which
|
||||
# still resets the depth counter and rolls up usage before the
|
||||
# exception reaches the caller. Letting them bubble naturally
|
||||
# avoids the ``except BaseException`` suppressor pattern.
|
||||
finally:
|
||||
_TASK_DEPTH_VAR.reset(token)
|
||||
|
||||
# Usage rolls up regardless of outcome so partial work is still billed.
|
||||
_absorb_inner_usage(parent_state, inner_state)
|
||||
# Usage rolls up on every path (success, caught Exception, or
|
||||
# propagating BaseException) so partial work is still billed.
|
||||
_absorb_inner_usage(parent_state, inner_state)
|
||||
|
||||
if task_exc is not None:
|
||||
if not isinstance(task_exc, Exception):
|
||||
# KeyboardInterrupt / SystemExit / CancelledError propagate
|
||||
# after the depth counter was safely restored above.
|
||||
raise task_exc
|
||||
logger.error(
|
||||
"[Baseline] Task sub-agent failed: %s", task_exc, exc_info=task_exc
|
||||
)
|
||||
|
||||
@@ -161,12 +161,14 @@ progress. Guidelines:
|
||||
|
||||
### Sub-agents — `Task`
|
||||
The `Task` tool runs an **in-process sub-agent** inside the current
|
||||
conversation. The sub-agent has the same tool set but its own message
|
||||
history, so its intermediate tool calls stay out of the parent context —
|
||||
you only see the sub-agent's final summary as the tool result. Use it
|
||||
for self-contained work that would otherwise generate a lot of
|
||||
intermediate chatter (focused research, bounded refactors, multi-step
|
||||
exploration where only the conclusion matters).
|
||||
conversation. The sub-agent inherits the parent's tool set **except
|
||||
`Task` itself** (nested delegation is refused — plan at most one level
|
||||
deep), and it gets its own message history so its intermediate tool
|
||||
calls stay out of the parent context — you only see the sub-agent's
|
||||
final summary as the tool result. Use it for self-contained work that
|
||||
would otherwise generate a lot of intermediate chatter (focused
|
||||
research, bounded refactors, multi-step exploration where only the
|
||||
conclusion matters).
|
||||
|
||||
- Provide a short `description` (3-5 words, shown in the accordion) and a
|
||||
complete `prompt` — the sub-agent does NOT inherit the parent
|
||||
|
||||
@@ -120,9 +120,10 @@ def test_total_schema_char_budget() -> None:
|
||||
|
||||
This locks in the 34% token reduction from #12398 and prevents future
|
||||
description bloat from eroding the gains. Uses character count with a
|
||||
~4 chars/token heuristic (budget of 32000 chars ≈ 8000 tokens).
|
||||
Character count is tokenizer-agnostic — no dependency on GPT or Claude
|
||||
tokenizers — while still providing a stable regression gate.
|
||||
~4 chars/token heuristic; see ``_CHAR_BUDGET`` above for the current
|
||||
value and its change history. Character count is tokenizer-agnostic
|
||||
— no dependency on GPT or Claude tokenizers — while still providing a
|
||||
stable regression gate.
|
||||
"""
|
||||
schemas = [tool.as_openai_tool() for tool in TOOL_REGISTRY.values()]
|
||||
serialized = json.dumps(schemas)
|
||||
|
||||
@@ -2116,7 +2116,9 @@
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/MemoryForgetConfirmResponse"
|
||||
}
|
||||
},
|
||||
{ "$ref": "#/components/schemas/TodoWriteResponse" },
|
||||
{ "$ref": "#/components/schemas/TaskResponse" }
|
||||
],
|
||||
"title": "Response Getv2[Dummy] Tool Response Type Export For Codegen"
|
||||
}
|
||||
@@ -16068,6 +16070,55 @@
|
||||
"required": ["recent_searches", "providers", "top_blocks"],
|
||||
"title": "SuggestionsResponse"
|
||||
},
|
||||
"TaskResponse": {
|
||||
"properties": {
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/ResponseType",
|
||||
"default": "task"
|
||||
},
|
||||
"message": { "type": "string", "title": "Message" },
|
||||
"session_id": {
|
||||
"anyOf": [{ "type": "string" }, { "type": "null" }],
|
||||
"title": "Session Id"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"title": "Description",
|
||||
"default": ""
|
||||
},
|
||||
"response": {
|
||||
"type": "string",
|
||||
"title": "Response",
|
||||
"description": "Final assistant text the sub-agent produced.",
|
||||
"default": ""
|
||||
},
|
||||
"iterations": {
|
||||
"type": "integer",
|
||||
"title": "Iterations",
|
||||
"default": 0
|
||||
},
|
||||
"tool_calls": {
|
||||
"items": { "type": "string" },
|
||||
"type": "array",
|
||||
"title": "Tool Calls",
|
||||
"description": "Names of tools the sub-agent invoked (for observability)."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": ["completed", "max_iterations", "error"],
|
||||
"title": "Status",
|
||||
"default": "completed"
|
||||
},
|
||||
"error": {
|
||||
"anyOf": [{ "type": "string" }, { "type": "null" }],
|
||||
"title": "Error"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": ["message"],
|
||||
"title": "TaskResponse",
|
||||
"description": "Result of a delegated ``Task`` in-process sub-agent run.\n\nThe sub-agent runs a fresh tool-call loop with an isolated message\nhistory, then returns only its final assistant text. Intermediate tool\ncalls and thinking stay inside the sub-agent's loop so the parent\ncontext is not polluted."
|
||||
},
|
||||
"TimezoneResponse": {
|
||||
"properties": {
|
||||
"timezone": {
|
||||
@@ -16684,6 +16735,52 @@
|
||||
"required": ["timezone"],
|
||||
"title": "TimezoneResponse"
|
||||
},
|
||||
"TodoItem": {
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"title": "Content",
|
||||
"description": "Imperative description of the task."
|
||||
},
|
||||
"activeForm": {
|
||||
"type": "string",
|
||||
"title": "Activeform",
|
||||
"description": "Present-continuous form shown while the task is running."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": ["pending", "in_progress", "completed"],
|
||||
"title": "Status",
|
||||
"default": "pending"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": ["content", "activeForm"],
|
||||
"title": "TodoItem",
|
||||
"description": "One entry in a ``TodoWrite`` checklist.\n\nMirrors the schema used by Claude Code's built-in ``TodoWrite`` tool so\nthe frontend's ``GenericTool`` accordion renders baseline-emitted todos\nidentically to SDK-emitted ones."
|
||||
},
|
||||
"TodoWriteResponse": {
|
||||
"properties": {
|
||||
"type": {
|
||||
"$ref": "#/components/schemas/ResponseType",
|
||||
"default": "todo_write"
|
||||
},
|
||||
"message": { "type": "string", "title": "Message" },
|
||||
"session_id": {
|
||||
"anyOf": [{ "type": "string" }, { "type": "null" }],
|
||||
"title": "Session Id"
|
||||
},
|
||||
"todos": {
|
||||
"items": { "$ref": "#/components/schemas/TodoItem" },
|
||||
"type": "array",
|
||||
"title": "Todos"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": ["message"],
|
||||
"title": "TodoWriteResponse",
|
||||
"description": "Ack returned by ``TodoWrite``.\n\nThe tool is effectively stateless — the authoritative task list lives in\nthe assistant's latest tool-call arguments, which are replayed from the\ntranscript on each turn. The tool output only needs to confirm that the\nupdate was accepted so the model can proceed."
|
||||
},
|
||||
"TokenIntrospectionResult": {
|
||||
"properties": {
|
||||
"active": { "type": "boolean", "title": "Active" },
|
||||
|
||||
Reference in New Issue
Block a user