refactor(benchmark): Refactor & rename functions in agent_interface.py and agent_api_interface.py

- `copy_artifacts_into_temp_folder` -> `copy_challenge_artifacts_into_workspace` - `copy_agent_artifacts_into_folder` -> `download_agent_artifacts_into_folder` - Reorder parameters of `run_api_agent`, `copy_challenge_artifacts_into_workspace`; use `Path` instead of `str`
2026-04-08 03:00:28 -04:00 · 2024-01-09 16:58:01 +01:00
parent 6a256fef4c
commit 5df2aa7939
4 changed files with 41 additions and 36 deletions
--- a/benchmark/agbenchmark/agent_api_interface.py
+++ b/benchmark/agbenchmark/agent_api_interface.py
@@ -8,21 +8,20 @@ from agent_protocol_client import AgentApi, ApiClient, Configuration, TaskReques

 from agbenchmark.agent_interface import get_list_of_file_paths
 from agbenchmark.config import AgentBenchmarkConfig
-from agbenchmark.utils.data_types import ChallengeData

-LOG = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)


 async def run_api_agent(
-    task: ChallengeData,
+    task: str,
    config: AgentBenchmarkConfig,
-    artifacts_location: str,
    timeout: int,
+    artifacts_location: Optional[Path] = None,
 ) -> None:
    configuration = Configuration(host=config.host)
    async with ApiClient(configuration) as api_client:
        api_instance = AgentApi(api_client)
-        task_request_body = TaskRequestBody(input=task.task)
+        task_request_body = TaskRequestBody(input=task)

        start_time = time.time()
        response = await api_instance.create_agent_task(
@@ -30,37 +29,36 @@ async def run_api_agent(
        )
        task_id = response.task_id

-        await upload_artifacts(
-            api_instance, artifacts_location, task_id, "artifacts_in"
-        )
+        if artifacts_location:
+            await upload_artifacts(
+                api_instance, artifacts_location, task_id, "artifacts_in"
+            )

        i = 1
-        steps_remaining = True
-        while steps_remaining:
-            # Read the existing JSON data from the file
-
+        while True:
            step = await api_instance.execute_agent_task_step(task_id=task_id)

-            print(f"[{task.name}] - step {step.name} ({i}. request)")
+            print(f"- step {step.name} ({i}. request)")
            i += 1

            if time.time() - start_time > timeout:
                raise TimeoutError("Time limit exceeded")
            if not step or step.is_last:
-                steps_remaining = False
+                break

-        # In "mock" mode, we cheat by giving the correct artifacts to pass the challenge
-        if os.getenv("IS_MOCK"):
-            await upload_artifacts(
-                api_instance, artifacts_location, task_id, "artifacts_out"
+        if artifacts_location:
+            # In "mock" mode, we cheat by giving the correct artifacts to pass the test
+            if os.getenv("IS_MOCK"):
+                await upload_artifacts(
+                    api_instance, artifacts_location, task_id, "artifacts_out"
+                )
+
+            await download_agent_artifacts_into_folder(
+                api_instance, task_id, config.temp_folder
            )

-        await copy_agent_artifacts_into_folder(
-            api_instance, task_id, config.temp_folder
-        )

-
-async def copy_agent_artifacts_into_folder(
+async def download_agent_artifacts_into_folder(
    api_instance: AgentApi, task_id: str, folder: Path
 ):
    artifacts = await api_instance.list_agent_task_artifacts(task_id=task_id)
@@ -76,11 +74,10 @@ async def copy_agent_artifacts_into_folder(
            folder = (folder / path).parent

        if not folder.exists():
-            LOG.info(f"Creating directory {folder}")
            folder.mkdir(parents=True)

        file_path = folder / artifact.file_name
-        LOG.info(f"Writing file {file_path}")
+        logger.debug(f"Downloading agent artifact {artifact.file_name} to {folder}")
        with open(file_path, "wb") as f:
            content = await api_instance.download_agent_task_artifact(
                task_id=task_id, artifact_id=artifact.artifact_id
@@ -90,7 +87,7 @@ async def copy_agent_artifacts_into_folder(


 async def upload_artifacts(
-    api_instance: AgentApi, artifacts_location: str, task_id: str, type: str
+    api_instance: AgentApi, artifacts_location: Path, task_id: str, type: str
 ) -> None:
    for file_path in get_list_of_file_paths(artifacts_location, type):
        relative_path: Optional[str] = "/".join(
--- a/benchmark/agbenchmark/agent_interface.py
+++ b/benchmark/agbenchmark/agent_interface.py
@@ -18,8 +18,8 @@ def get_list_of_file_paths(
    return list(source_dir.iterdir())


-def copy_artifacts_into_temp_folder(
-    workspace: str | Path, artifact_folder_name: str, challenge_dir_path: str | Path
+def copy_challenge_artifacts_into_workspace(
+    challenge_dir_path: str | Path, artifact_folder_name: str, workspace: str | Path
 ) -> None:
    file_paths = get_list_of_file_paths(challenge_dir_path, artifact_folder_name)
    for file_path in file_paths:
--- a/benchmark/agbenchmark/app.py
+++ b/benchmark/agbenchmark/app.py
@@ -125,10 +125,10 @@ def stream_output(pipe):

 def setup_fastapi_app(agbenchmark_config: AgentBenchmarkConfig) -> FastAPI:
    from agbenchmark.agent_api_interface import (
-        copy_agent_artifacts_into_folder,
+        download_agent_artifacts_into_folder,
        upload_artifacts,
    )
-    from agbenchmark.agent_interface import copy_artifacts_into_temp_folder
+    from agbenchmark.agent_interface import copy_challenge_artifacts_into_workspace
    from agbenchmark.generate_test import create_challenge_from_spec_file
    from agbenchmark.main import run_benchmark

@@ -249,7 +249,7 @@ def setup_fastapi_app(agbenchmark_config: AgentBenchmarkConfig) -> FastAPI:
                ] = task_eval_request.eval_id
                await upload_artifacts(
                    api_instance,
-                    str(CHALLENGES[task_eval_request.eval_id].spec_file.parent),
+                    CHALLENGES[task_eval_request.eval_id].spec_file.parent,
                    task_response.task_id,
                    "artifacts_in",
                )
@@ -286,10 +286,14 @@ def setup_fastapi_app(agbenchmark_config: AgentBenchmarkConfig) -> FastAPI:
        try:
            async with ApiClient(configuration) as api_client:
                api_instance = AgentApi(api_client)
-                await copy_agent_artifacts_into_folder(api_instance, task_id, workspace)
+                await download_agent_artifacts_into_folder(
+                    api_instance, task_id, workspace
+                )

            artifact_path = challenge_info.spec_file.parent
-            copy_artifacts_into_temp_folder(workspace, "custom_python", artifact_path)
+            copy_challenge_artifacts_into_workspace(
+                artifact_path, "custom_python", workspace
+            )

            challenge = create_challenge_from_spec_file(challenge_info.spec_file)
            scores = challenge.get_scores(workspace)
--- a/benchmark/agbenchmark/utils/challenge.py
+++ b/benchmark/agbenchmark/utils/challenge.py
@@ -86,7 +86,7 @@ class Challenge(ABC):
        assert is_score_100

    async def run_challenge(self, config: AgentBenchmarkConfig, cutoff: int) -> None:
-        from agbenchmark.agent_interface import copy_artifacts_into_temp_folder
+        from agbenchmark.agent_interface import copy_challenge_artifacts_into_workspace

        if not self.data.task:
            return
@@ -98,7 +98,9 @@ class Challenge(ABC):
        )
        print(f"{Fore.BLACK}Task: {self.data.task}{Fore.RESET}")

-        await run_api_agent(self.data, config, self.ARTIFACTS_LOCATION, cutoff)
+        await run_api_agent(
+            self.data.task, config, cutoff, Path(self.ARTIFACTS_LOCATION)
+        )

        # hidden files are added after the agent runs. Hidden files can be python test files.
        # We copy them in the temporary folder to make it easy to import the code produced by the agent
@@ -107,7 +109,9 @@ class Challenge(ABC):
            str(Path(self.CHALLENGE_LOCATION).parent),
        ]
        for path in artifact_paths:
-            copy_artifacts_into_temp_folder(config.temp_folder, "custom_python", path)
+            copy_challenge_artifacts_into_workspace(
+                path, "custom_python", config.temp_folder
+            )

    @staticmethod
    def get_artifacts_out(