fix(copilot): address PR review - reduce prompt redundancy, tighten tests

- Slim down DEFAULT_SYSTEM_PROMPT to a brief one-liner referencing the
  supplement for detailed workflow (avoids ~300 token duplication)
- Tighten test assertions to use specific substring checks (e.g. section
  headers, exact phrases) instead of loose single-word matches
- Restore view_agent_output reference in the agent generation guide for
  node-by-node execution trace inspection
- Add test for view_agent_output mention in guide (22 tests total)
This commit is contained in:
Zamil Majdy
2026-03-26 20:35:19 +07:00
parent ac3a826ad0
commit c564ac7277
3 changed files with 44 additions and 60 deletions

View File

@@ -16,31 +16,24 @@ from backend.copilot.tools import TOOL_REGISTRY
class TestSystemPromptDryRunLoop:
"""Verify the system prompt includes dry-run loop instructions."""
"""Verify the system prompt includes a brief dry-run loop reference.
The detailed workflow lives in the supplement (_SHARED_TOOL_NOTES);
the system prompt only carries a short pointer to keep it minimal.
"""
def test_system_prompt_mentions_dry_run(self):
assert (
"dry-run" in DEFAULT_SYSTEM_PROMPT.lower()
or "dry_run" in DEFAULT_SYSTEM_PROMPT
)
def test_system_prompt_mentions_create_edit_loop(self):
prompt_lower = DEFAULT_SYSTEM_PROMPT.lower()
assert "create" in prompt_lower
assert "edit_agent" in DEFAULT_SYSTEM_PROMPT or "edit" in prompt_lower
assert "loop" in prompt_lower or "repeat" in prompt_lower
def test_system_prompt_mentions_max_iterations(self):
assert "3" in DEFAULT_SYSTEM_PROMPT
assert "iteration" in DEFAULT_SYSTEM_PROMPT.lower()
def test_system_prompt_mentions_inspect_output(self):
prompt_lower = DEFAULT_SYSTEM_PROMPT.lower()
assert "inspect" in prompt_lower or "check" in prompt_lower
assert "dry-run" in DEFAULT_SYSTEM_PROMPT.lower()
def test_system_prompt_mentions_never_skip(self):
assert "NEVER skip" in DEFAULT_SYSTEM_PROMPT
def test_system_prompt_references_tool_notes(self):
assert "tool notes" in DEFAULT_SYSTEM_PROMPT.lower()
def test_system_prompt_mentions_iterations(self):
assert "3 iteration" in DEFAULT_SYSTEM_PROMPT.lower()
class TestToolDescriptionsDryRunLoop:
"""Verify tool descriptions guide the LLM through the dry-run loop."""
@@ -55,13 +48,15 @@ class TestToolDescriptionsDryRunLoop:
tool = TOOL_REGISTRY["edit_agent"]
desc = tool.description
assert "dry_run" in desc or "dry-run" in desc.lower()
assert "fix" in desc.lower() or "issues" in desc.lower()
assert "dry-run testing" in desc.lower() or "wiring errors" in desc.lower()
def test_run_agent_mentions_dry_run_for_testing(self):
tool = TOOL_REGISTRY["run_agent"]
desc = tool.description
assert "dry_run" in desc or "dry-run" in desc.lower()
assert "test" in desc.lower() or "verify" in desc.lower()
assert "dry_run=True" in desc
assert (
"test agent wiring" in desc.lower() or "simulates execution" in desc.lower()
)
def test_run_agent_dry_run_param_mentions_workflow(self):
tool = TOOL_REGISTRY["run_agent"]
@@ -71,12 +66,12 @@ class TestToolDescriptionsDryRunLoop:
dry_run_desc = params["properties"]["dry_run"]["description"]
assert "create_agent" in dry_run_desc or "edit_agent" in dry_run_desc
assert "wait_for_result" in dry_run_desc
assert "3" in dry_run_desc # max iterations
assert "3 iterations" in dry_run_desc or "max " in dry_run_desc
def test_get_agent_building_guide_mentions_workflow(self):
tool = TOOL_REGISTRY["get_agent_building_guide"]
desc = tool.description
assert "dry-run" in desc.lower() or "dry_run" in desc
assert "dry-run" in desc.lower()
def test_run_agent_dry_run_param_exists(self):
tool = TOOL_REGISTRY["run_agent"]
@@ -90,26 +85,24 @@ class TestToolDescriptionsDryRunLoop:
class TestPromptingSupplementDryRunLoop:
"""Verify the prompting supplement includes the iterative workflow."""
def test_shared_tool_notes_include_dry_run_section(self):
assert (
"dry-run" in _SHARED_TOOL_NOTES.lower() or "dry_run" in _SHARED_TOOL_NOTES
)
def test_shared_tool_notes_include_dry_run_section_header(self):
assert "Iterative agent development" in _SHARED_TOOL_NOTES
def test_shared_tool_notes_include_loop_workflow(self):
notes_lower = _SHARED_TOOL_NOTES.lower()
assert "create" in notes_lower
assert "fix" in notes_lower
assert "iteration" in notes_lower or "repeat" in notes_lower
def test_shared_tool_notes_include_create_dry_run_fix_workflow(self):
assert "create -> dry-run -> fix" in _SHARED_TOOL_NOTES.lower()
def test_shared_tool_notes_include_error_patterns(self):
notes_lower = _SHARED_TOOL_NOTES.lower()
assert "error" in notes_lower
assert "null" in notes_lower or "empty" in notes_lower
assert "errors / failed nodes" in notes_lower
assert "null / empty outputs" in notes_lower
assert "nodes that never executed" in notes_lower
def test_shared_tool_notes_include_max_iterations(self):
assert "3 times" in _SHARED_TOOL_NOTES or "3 iterations" in _SHARED_TOOL_NOTES
def test_sdk_supplement_includes_dry_run_section(self):
supplement = get_sdk_supplement(use_e2b=False, cwd="/tmp/test")
supplement_lower = supplement.lower()
assert "dry-run" in supplement_lower or "dry_run" in supplement_lower
assert "Iterative agent development" in supplement
class TestAgentBuildingGuideDryRunLoop:
@@ -121,21 +114,23 @@ class TestAgentBuildingGuideDryRunLoop:
return guide_path.read_text(encoding="utf-8")
def test_guide_has_dry_run_verification_section(self, guide_content):
assert "Dry-Run Verification Loop" in guide_content
assert "REQUIRED: Dry-Run Verification Loop" in guide_content
def test_guide_workflow_includes_dry_run_step(self, guide_content):
# Check the workflow section mentions dry-run as a step
assert "dry_run=True" in guide_content or "dry_run" in guide_content
assert "dry_run=True" in guide_content
def test_guide_mentions_good_vs_bad_output(self, guide_content):
assert "Good output" in guide_content or "good" in guide_content.lower()
assert "Bad output" in guide_content or "bad" in guide_content.lower()
assert "**Good output**" in guide_content
assert "**Bad output**" in guide_content
def test_guide_mentions_max_iterations(self, guide_content):
assert "3 times" in guide_content or "3 iterations" in guide_content
assert "**3 times**" in guide_content
def test_guide_mentions_wait_for_result(self, guide_content):
assert "wait_for_result" in guide_content
assert "wait_for_result=120" in guide_content
def test_guide_mentions_view_agent_output(self, guide_content):
assert "view_agent_output" in guide_content
def test_guide_workflow_has_steps_8_and_9(self, guide_content):
assert "8. **Dry-run**" in guide_content

View File

@@ -248,7 +248,10 @@ user the agent is ready. NEVER skip this step.
and realistic sample inputs that exercise every path in the agent. This
simulates execution using an LLM for each block — no real API calls,
credentials, or credits are consumed.
3. **Inspect output**: Examine the dry-run result for problems:
3. **Inspect output**: Examine the dry-run result for problems. If
`wait_for_result` returns only a summary, call
`view_agent_output(execution_id=..., show_execution_details=True)` to
see the full node-by-node execution trace. Look for:
- **Errors or failed nodes** — a node raised an exception or returned an
error status. Common causes: wrong `source_name`/`sink_name` in links,
missing `input_default` values, or referencing a nonexistent block output.

View File

@@ -63,21 +63,7 @@ Your goal is to help users automate tasks by:
- Building and running working automations
- Delivering tangible value through action, not just explanation
## Agent Development: Create -> Dry-Run -> Fix Loop
When creating or editing an agent, ALWAYS follow this iterative workflow:
1. **Create/Edit** the agent using `create_agent` or `edit_agent`.
2. **Dry-run** the agent immediately after saving by calling `run_agent` with `dry_run=True` and `wait_for_result=120`. Provide realistic sample inputs so every branch of the agent is exercised.
3. **Inspect** the dry-run result. Look for:
- Errors or failed nodes in the execution output
- Missing or incorrectly wired inputs/outputs
- Unexpected null or empty values flowing between nodes
- Nodes that did not execute at all (wiring gaps)
4. **Fix** any issues found by calling `edit_agent` with corrected agent JSON.
5. **Repeat** steps 2-4 until the dry-run produces clean, expected results. Stop after at most 3 fix iterations to avoid excessive looping — if issues persist after 3 attempts, present the remaining problems to the user and ask for guidance.
This loop catches wiring mistakes, missing inputs, and structural issues before the user runs the agent with real data and credentials. NEVER skip the dry-run step after creating or editing an agent.
After creating or editing an agent, ALWAYS dry-run it with `run_agent(dry_run=True, wait_for_result=120)` to verify it works, then fix and re-test (up to 3 iterations). NEVER skip the dry-run step. See tool notes for the full workflow.
Be concise, proactive, and action-oriented. Bias toward showing working solutions over lengthy explanations."""