feat(classic): add noninteractive mode env var and benchmark config logging

- Add NONINTERACTIVE_MODE env var support to AppConfig for disabling
  user interaction during automated runs
- Benchmark harness now sets NONINTERACTIVE_MODE=True when starting agents
- Add agent configuration logging at server startup (model, strategy, etc.)
- Harness logs env vars being passed to agent for verification
- Add --agent-output flag to show full agent server output for debugging

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Nicholas Tindle
2026-01-19 19:40:24 -06:00
parent 32f68d5999
commit acb320d32d
3 changed files with 52 additions and 16 deletions

View File

@@ -380,11 +380,13 @@ def start_agent(
strategy: str,
model_config: ModelConfig,
port: int = 8000,
show_agent_output: bool = False,
) -> subprocess.Popen:
"""Start the AutoGPT agent with a specific strategy and model config."""
env = os.environ.copy()
env["PROMPT_STRATEGY"] = strategy
env["AP_SERVER_PORT"] = str(port)
env["NONINTERACTIVE_MODE"] = "True"
# Set model configuration if specified
model_env = model_config.to_env()
@@ -394,14 +396,14 @@ def start_agent(
model_desc = f" with {model_config.name}" if model_config.name != "default" else ""
log(f"Starting agent with strategy '{strategy}'{model_desc} on port {port}...")
if model_config.smart_llm:
log(f" Smart LLM: {model_config.smart_llm}")
if model_config.fast_llm:
log(f" Fast LLM: {model_config.fast_llm}")
log(f" PROMPT_STRATEGY: {env['PROMPT_STRATEGY']}")
log(f" NONINTERACTIVE_MODE: {env.get('NONINTERACTIVE_MODE', 'not set')}")
log(f" SMART_LLM: {env.get('SMART_LLM', '(env default)')}")
log(f" FAST_LLM: {env.get('FAST_LLM', '(env default)')}")
if model_config.thinking_budget_tokens:
log(f" Thinking Budget: {model_config.thinking_budget_tokens} tokens")
log(f" THINKING_BUDGET: {model_config.thinking_budget_tokens} tokens")
if model_config.reasoning_effort:
log(f" Reasoning Effort: {model_config.reasoning_effort}")
log(f" REASONING_EFFORT: {model_config.reasoning_effort}")
# Start the agent server (port is set via AP_SERVER_PORT env var)
proc = subprocess.Popen(
@@ -410,12 +412,26 @@ def start_agent(
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=Path(__file__).parent.parent,
text=True,
bufsize=1, # Line buffered
)
# Wait for agent to be ready with progress indicator
log_progress(" Waiting for agent to be ready")
# Wait for agent to be ready, streaming output
import select
import threading
# Thread to read and print agent output
def stream_output():
if proc.stdout:
for line in proc.stdout:
if show_agent_output:
print(f" [agent] {line.rstrip()}", flush=True)
output_thread = threading.Thread(target=stream_output, daemon=True)
output_thread.start()
log(" Waiting for agent to be ready...")
start_time = time.time()
check_count = 0
while time.time() - start_time < AGENT_STARTUP_TIMEOUT:
try:
import urllib.request
@@ -423,17 +439,12 @@ def start_agent(
urllib.request.urlopen(
f"http://localhost:{port}/ap/v1/agent/tasks", timeout=2
)
print() # Newline after dots
elapsed = time.time() - start_time
log(f"Agent ready on port {port} (took {elapsed:.1f}s)")
return proc
except Exception:
check_count += 1
if check_count % 2 == 0: # Print dot every second
print(".", end="", flush=True)
time.sleep(0.5)
print() # Newline after dots
proc.kill()
raise TimeoutError(f"Agent failed to start within {AGENT_STARTUP_TIMEOUT}s")
@@ -789,6 +800,7 @@ def run_benchmark_config(
tests: Optional[list[str]],
attempts: int,
verbose: bool = True,
show_agent_output: bool = False,
) -> Optional[BenchmarkResult]:
"""Run benchmark for a single strategy and model configuration."""
config_name = (
@@ -804,7 +816,7 @@ def run_benchmark_config(
agent_proc = None
try:
# Start agent
agent_proc = start_agent(strategy, model_config, port)
agent_proc = start_agent(strategy, model_config, port, show_agent_output)
# Run benchmark
report_dir = run_benchmark(
@@ -1048,6 +1060,11 @@ def main():
action="store_true",
help="Suppress benchmark output (only show summary)",
)
parser.add_argument(
"--agent-output",
action="store_true",
help="Show agent server output (useful for debugging config)",
)
args = parser.parse_args()
verbose = not args.quiet
@@ -1227,6 +1244,7 @@ def main():
tests=tests,
attempts=args.attempts,
verbose=verbose,
show_agent_output=args.agent_output,
)
if result:
results[config_name] = result

View File

@@ -43,7 +43,9 @@ class AppConfig(BaseConfig):
skip_reprompt: bool = False
authorise_key: str = UserConfigurable(default="y", from_env="AUTHORISE_COMMAND_KEY")
exit_key: str = UserConfigurable(default="n", from_env="EXIT_KEY")
noninteractive_mode: bool = False
noninteractive_mode: bool = UserConfigurable(
default=False, from_env="NONINTERACTIVE_MODE"
)
logging: LoggingConfig = LoggingConfig()
component_config_file: Optional[Path] = UserConfigurable(
default=None, from_env="COMPONENT_CONFIG_FILE"

View File

@@ -529,6 +529,22 @@ async def run_auto_gpt_server(
tts_config=config.tts_config,
)
# Log configuration for debugging/verification
logger = logging.getLogger(__name__)
logger.info("=" * 60)
logger.info("AGENT CONFIGURATION")
logger.info("=" * 60)
logger.info(f" Smart LLM: {config.smart_llm}")
logger.info(f" Fast LLM: {config.fast_llm}")
logger.info(f" Prompt Strategy: {config.prompt_strategy}")
logger.info(f" Temperature: {config.temperature}")
logger.info(f" Noninteractive: {config.noninteractive_mode}")
if config.thinking_budget_tokens:
logger.info(f" Thinking Budget: {config.thinking_budget_tokens} tokens")
if config.reasoning_effort:
logger.info(f" Reasoning Effort: {config.reasoning_effort}")
logger.info("=" * 60)
await assert_config_has_required_llm_api_keys(config)
await apply_overrides_to_config(