Add agent protocol interface test (#259)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
2026-04-08 03:00:28 -04:00 · 2023-08-05 18:00:05 -07:00
parent 13d2dcbf5e
commit 530eb61f25
5 changed files with 49 additions and 47 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -170,8 +170,7 @@ jobs:
          elif [ "$AGENT_NAME" == "beebot" ]; then
            poetry install
            poetry run playwright install
-            uvicorn beebot.initiator.api:create_app --factory --reload  --timeout-graceful-shutdown=3 --timeout-keep-alive=300 &
-            agent-protocol test --url=127.0.0.1:8000 || echo "Beebot is not compliant with the agent protocol"
+            uvicorn beebot.initiator.api:create_app --reload &
            prefix="poetry run "
          else
            echo "Unknown agent name: $AGENT_NAME"
--- a/agbenchmark/agent_interface.py
+++ b/agbenchmark/agent_interface.py
@@ -20,56 +20,57 @@ def run_agent(
    task: str, config: Dict[str, Any], artifacts_location: str, cutoff: int
 ) -> None:
    """Calling to get a response"""
-
+    if task == "":
+        return
    if MOCK_FLAG:
        print("Running mock agent")
        copy_artifacts_into_workspace(
            config["workspace"], "artifacts_out", artifacts_location
        )
+        return
+    entry_path = "agbenchmark.benchmarks"
+
+    timeout = cutoff
+    if "--nc" in sys.argv:
+        timeout = 100000
+
+    print(f"Running '{entry_path}' with timeout {timeout}")
+
+    command = [sys.executable, "-m", entry_path, str(task)]
+    process = subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        universal_newlines=True,
+        cwd=HOME_DIRECTORY,
+        bufsize=1,
+    )
+
+    start_time = time.time()
+
+    while True:
+        try:
+            # This checks if there's data to be read from stdout without blocking.
+            if process.stdout and select.select([process.stdout], [], [], 0)[0]:
+                output = process.stdout.readline()
+                print(output.strip())
+        except Exception as e:
+            continue
+
+        # Check if process has ended, has no more output, or exceeded timeout
+        if process.poll() is not None or (time.time() - start_time > timeout):
+            break
+
+    if time.time() - start_time > timeout:
+        print("The Python function has exceeded the time limit and was terminated.")
+        process.kill()
    else:
-        entry_path = "agbenchmark.benchmarks"
+        print("The Python function has finished running.")

-        timeout = cutoff
-        if "--nc" in sys.argv:
-            timeout = 100000
+    process.wait()

-        print(f"Running '{entry_path}' with timeout {timeout}")
-
-        command = [sys.executable, "-m", entry_path, str(task)]
-        process = subprocess.Popen(
-            command,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            universal_newlines=True,
-            cwd=HOME_DIRECTORY,
-            bufsize=1,
-        )
-
-        start_time = time.time()
-
-        while True:
-            try:
-                # This checks if there's data to be read from stdout without blocking.
-                if process.stdout and select.select([process.stdout], [], [], 0)[0]:
-                    output = process.stdout.readline()
-                    print(output.strip())
-            except Exception as e:
-                continue
-
-            # Check if process has ended, has no more output, or exceeded timeout
-            if process.poll() is not None or (time.time() - start_time > timeout):
-                break
-
-        if time.time() - start_time > timeout:
-            print("The Python function has exceeded the time limit and was terminated.")
-            process.kill()
-        else:
-            print("The Python function has finished running.")
-
-        process.wait()
-
-        if process.returncode != 0:
-            print(f"The agent timed out")
+    if process.returncode != 0:
+        print(f"The agent timed out")


 def copy_artifacts_into_workspace(
--- a/agbenchmark/challenges
+++ b/agbenchmark/challenges
--- a/agbenchmark/utils/challenge.py
+++ b/agbenchmark/utils/challenge.py
@@ -177,7 +177,9 @@ class Challenge(ABC):
        percentage = None

        try:
-            if isinstance(self.data.ground, Ground):
+            if self.data.task == "" and MOCK_FLAG:
+                scores = [1.0]
+            elif isinstance(self.data.ground, Ground):
                files_contents = self.get_artifacts_out(
                    config["workspace"], self.data.ground
                )
--- a/agent/beebot
+++ b/agent/beebot