feat(classic): add noninteractive mode env var and benchmark config logging

- Add NONINTERACTIVE_MODE env var support to AppConfig for disabling user interaction during automated runs - Benchmark harness now sets NONINTERACTIVE_MODE=True when starting agents - Add agent configuration logging at server startup (model, strategy, etc.) - Harness logs env vars being passed to agent for verification - Add --agent-output flag to show full agent server output for debugging Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-30 03:00:41 -04:00 · 2026-01-19 19:40:24 -06:00
parent 32f68d5999
commit acb320d32d
3 changed files with 52 additions and 16 deletions
--- a/classic/original_autogpt/agbenchmark_config/test_prompt_strategies.py
+++ b/classic/original_autogpt/agbenchmark_config/test_prompt_strategies.py
@@ -380,11 +380,13 @@ def start_agent(
    strategy: str,
    model_config: ModelConfig,
    port: int = 8000,
+    show_agent_output: bool = False,
 ) -> subprocess.Popen:
    """Start the AutoGPT agent with a specific strategy and model config."""
    env = os.environ.copy()
    env["PROMPT_STRATEGY"] = strategy
    env["AP_SERVER_PORT"] = str(port)
+    env["NONINTERACTIVE_MODE"] = "True"

    # Set model configuration if specified
    model_env = model_config.to_env()
@@ -394,14 +396,14 @@ def start_agent(

    model_desc = f" with {model_config.name}" if model_config.name != "default" else ""
    log(f"Starting agent with strategy '{strategy}'{model_desc} on port {port}...")
-    if model_config.smart_llm:
-        log(f"  Smart LLM: {model_config.smart_llm}")
-    if model_config.fast_llm:
-        log(f"  Fast LLM: {model_config.fast_llm}")
+    log(f"  PROMPT_STRATEGY:      {env['PROMPT_STRATEGY']}")
+    log(f"  NONINTERACTIVE_MODE:  {env.get('NONINTERACTIVE_MODE', 'not set')}")
+    log(f"  SMART_LLM:            {env.get('SMART_LLM', '(env default)')}")
+    log(f"  FAST_LLM:             {env.get('FAST_LLM', '(env default)')}")
    if model_config.thinking_budget_tokens:
-        log(f"  Thinking Budget: {model_config.thinking_budget_tokens} tokens")
+        log(f"  THINKING_BUDGET:      {model_config.thinking_budget_tokens} tokens")
    if model_config.reasoning_effort:
-        log(f"  Reasoning Effort: {model_config.reasoning_effort}")
+        log(f"  REASONING_EFFORT:     {model_config.reasoning_effort}")

    # Start the agent server (port is set via AP_SERVER_PORT env var)
    proc = subprocess.Popen(
@@ -410,12 +412,26 @@ def start_agent(
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        cwd=Path(__file__).parent.parent,
+        text=True,
+        bufsize=1,  # Line buffered
    )

-    # Wait for agent to be ready with progress indicator
-    log_progress("  Waiting for agent to be ready")
+    # Wait for agent to be ready, streaming output
+    import select
+    import threading
+
+    # Thread to read and print agent output
+    def stream_output():
+        if proc.stdout:
+            for line in proc.stdout:
+                if show_agent_output:
+                    print(f"    [agent] {line.rstrip()}", flush=True)
+
+    output_thread = threading.Thread(target=stream_output, daemon=True)
+    output_thread.start()
+
+    log("  Waiting for agent to be ready...")
    start_time = time.time()
-    check_count = 0
    while time.time() - start_time < AGENT_STARTUP_TIMEOUT:
        try:
            import urllib.request
@@ -423,17 +439,12 @@ def start_agent(
            urllib.request.urlopen(
                f"http://localhost:{port}/ap/v1/agent/tasks", timeout=2
            )
-            print()  # Newline after dots
            elapsed = time.time() - start_time
            log(f"Agent ready on port {port} (took {elapsed:.1f}s)")
            return proc
        except Exception:
-            check_count += 1
-            if check_count % 2 == 0:  # Print dot every second
-                print(".", end="", flush=True)
            time.sleep(0.5)

-    print()  # Newline after dots
    proc.kill()
    raise TimeoutError(f"Agent failed to start within {AGENT_STARTUP_TIMEOUT}s")

@@ -789,6 +800,7 @@ def run_benchmark_config(
    tests: Optional[list[str]],
    attempts: int,
    verbose: bool = True,
+    show_agent_output: bool = False,
 ) -> Optional[BenchmarkResult]:
    """Run benchmark for a single strategy and model configuration."""
    config_name = (
@@ -804,7 +816,7 @@ def run_benchmark_config(
    agent_proc = None
    try:
        # Start agent
-        agent_proc = start_agent(strategy, model_config, port)
+        agent_proc = start_agent(strategy, model_config, port, show_agent_output)

        # Run benchmark
        report_dir = run_benchmark(
@@ -1048,6 +1060,11 @@ def main():
        action="store_true",
        help="Suppress benchmark output (only show summary)",
    )
+    parser.add_argument(
+        "--agent-output",
+        action="store_true",
+        help="Show agent server output (useful for debugging config)",
+    )

    args = parser.parse_args()
    verbose = not args.quiet
@@ -1227,6 +1244,7 @@ def main():
                tests=tests,
                attempts=args.attempts,
                verbose=verbose,
+                show_agent_output=args.agent_output,
            )
            if result:
                results[config_name] = result
--- a/classic/original_autogpt/autogpt/app/config.py
+++ b/classic/original_autogpt/autogpt/app/config.py
@@ -43,7 +43,9 @@ class AppConfig(BaseConfig):
    skip_reprompt: bool = False
    authorise_key: str = UserConfigurable(default="y", from_env="AUTHORISE_COMMAND_KEY")
    exit_key: str = UserConfigurable(default="n", from_env="EXIT_KEY")
-    noninteractive_mode: bool = False
+    noninteractive_mode: bool = UserConfigurable(
+        default=False, from_env="NONINTERACTIVE_MODE"
+    )
    logging: LoggingConfig = LoggingConfig()
    component_config_file: Optional[Path] = UserConfigurable(
        default=None, from_env="COMPONENT_CONFIG_FILE"
--- a/classic/original_autogpt/autogpt/app/main.py
+++ b/classic/original_autogpt/autogpt/app/main.py
@@ -529,6 +529,22 @@ async def run_auto_gpt_server(
        tts_config=config.tts_config,
    )

+    # Log configuration for debugging/verification
+    logger = logging.getLogger(__name__)
+    logger.info("=" * 60)
+    logger.info("AGENT CONFIGURATION")
+    logger.info("=" * 60)
+    logger.info(f"  Smart LLM:          {config.smart_llm}")
+    logger.info(f"  Fast LLM:           {config.fast_llm}")
+    logger.info(f"  Prompt Strategy:    {config.prompt_strategy}")
+    logger.info(f"  Temperature:        {config.temperature}")
+    logger.info(f"  Noninteractive:     {config.noninteractive_mode}")
+    if config.thinking_budget_tokens:
+        logger.info(f"  Thinking Budget:    {config.thinking_budget_tokens} tokens")
+    if config.reasoning_effort:
+        logger.info(f"  Reasoning Effort:   {config.reasoning_effort}")
+    logger.info("=" * 60)
+
    await assert_config_has_required_llm_api_keys(config)

    await apply_overrides_to_config(