refactor(benchmark): Refactor & rename functions in agent_interface.py and agent_api_interface.py

- `copy_artifacts_into_temp_folder` -> `copy_challenge_artifacts_into_workspace`
- `copy_agent_artifacts_into_folder` -> `download_agent_artifacts_into_folder`
- Reorder parameters of `run_api_agent`, `copy_challenge_artifacts_into_workspace`; use `Path` instead of `str`
This commit is contained in:
Reinier van der Leer
2024-01-09 16:58:01 +01:00
parent 6a256fef4c
commit 5df2aa7939
4 changed files with 41 additions and 36 deletions

View File

@@ -8,21 +8,20 @@ from agent_protocol_client import AgentApi, ApiClient, Configuration, TaskReques
from agbenchmark.agent_interface import get_list_of_file_paths
from agbenchmark.config import AgentBenchmarkConfig
from agbenchmark.utils.data_types import ChallengeData
LOG = logging.getLogger(__name__)
logger = logging.getLogger(__name__)
async def run_api_agent(
task: ChallengeData,
task: str,
config: AgentBenchmarkConfig,
artifacts_location: str,
timeout: int,
artifacts_location: Optional[Path] = None,
) -> None:
configuration = Configuration(host=config.host)
async with ApiClient(configuration) as api_client:
api_instance = AgentApi(api_client)
task_request_body = TaskRequestBody(input=task.task)
task_request_body = TaskRequestBody(input=task)
start_time = time.time()
response = await api_instance.create_agent_task(
@@ -30,37 +29,36 @@ async def run_api_agent(
)
task_id = response.task_id
await upload_artifacts(
api_instance, artifacts_location, task_id, "artifacts_in"
)
if artifacts_location:
await upload_artifacts(
api_instance, artifacts_location, task_id, "artifacts_in"
)
i = 1
steps_remaining = True
while steps_remaining:
# Read the existing JSON data from the file
while True:
step = await api_instance.execute_agent_task_step(task_id=task_id)
print(f"[{task.name}] - step {step.name} ({i}. request)")
print(f"- step {step.name} ({i}. request)")
i += 1
if time.time() - start_time > timeout:
raise TimeoutError("Time limit exceeded")
if not step or step.is_last:
steps_remaining = False
break
# In "mock" mode, we cheat by giving the correct artifacts to pass the challenge
if os.getenv("IS_MOCK"):
await upload_artifacts(
api_instance, artifacts_location, task_id, "artifacts_out"
if artifacts_location:
# In "mock" mode, we cheat by giving the correct artifacts to pass the test
if os.getenv("IS_MOCK"):
await upload_artifacts(
api_instance, artifacts_location, task_id, "artifacts_out"
)
await download_agent_artifacts_into_folder(
api_instance, task_id, config.temp_folder
)
await copy_agent_artifacts_into_folder(
api_instance, task_id, config.temp_folder
)
async def copy_agent_artifacts_into_folder(
async def download_agent_artifacts_into_folder(
api_instance: AgentApi, task_id: str, folder: Path
):
artifacts = await api_instance.list_agent_task_artifacts(task_id=task_id)
@@ -76,11 +74,10 @@ async def copy_agent_artifacts_into_folder(
folder = (folder / path).parent
if not folder.exists():
LOG.info(f"Creating directory {folder}")
folder.mkdir(parents=True)
file_path = folder / artifact.file_name
LOG.info(f"Writing file {file_path}")
logger.debug(f"Downloading agent artifact {artifact.file_name} to {folder}")
with open(file_path, "wb") as f:
content = await api_instance.download_agent_task_artifact(
task_id=task_id, artifact_id=artifact.artifact_id
@@ -90,7 +87,7 @@ async def copy_agent_artifacts_into_folder(
async def upload_artifacts(
api_instance: AgentApi, artifacts_location: str, task_id: str, type: str
api_instance: AgentApi, artifacts_location: Path, task_id: str, type: str
) -> None:
for file_path in get_list_of_file_paths(artifacts_location, type):
relative_path: Optional[str] = "/".join(

View File

@@ -18,8 +18,8 @@ def get_list_of_file_paths(
return list(source_dir.iterdir())
def copy_artifacts_into_temp_folder(
workspace: str | Path, artifact_folder_name: str, challenge_dir_path: str | Path
def copy_challenge_artifacts_into_workspace(
challenge_dir_path: str | Path, artifact_folder_name: str, workspace: str | Path
) -> None:
file_paths = get_list_of_file_paths(challenge_dir_path, artifact_folder_name)
for file_path in file_paths:

View File

@@ -125,10 +125,10 @@ def stream_output(pipe):
def setup_fastapi_app(agbenchmark_config: AgentBenchmarkConfig) -> FastAPI:
from agbenchmark.agent_api_interface import (
copy_agent_artifacts_into_folder,
download_agent_artifacts_into_folder,
upload_artifacts,
)
from agbenchmark.agent_interface import copy_artifacts_into_temp_folder
from agbenchmark.agent_interface import copy_challenge_artifacts_into_workspace
from agbenchmark.generate_test import create_challenge_from_spec_file
from agbenchmark.main import run_benchmark
@@ -249,7 +249,7 @@ def setup_fastapi_app(agbenchmark_config: AgentBenchmarkConfig) -> FastAPI:
] = task_eval_request.eval_id
await upload_artifacts(
api_instance,
str(CHALLENGES[task_eval_request.eval_id].spec_file.parent),
CHALLENGES[task_eval_request.eval_id].spec_file.parent,
task_response.task_id,
"artifacts_in",
)
@@ -286,10 +286,14 @@ def setup_fastapi_app(agbenchmark_config: AgentBenchmarkConfig) -> FastAPI:
try:
async with ApiClient(configuration) as api_client:
api_instance = AgentApi(api_client)
await copy_agent_artifacts_into_folder(api_instance, task_id, workspace)
await download_agent_artifacts_into_folder(
api_instance, task_id, workspace
)
artifact_path = challenge_info.spec_file.parent
copy_artifacts_into_temp_folder(workspace, "custom_python", artifact_path)
copy_challenge_artifacts_into_workspace(
artifact_path, "custom_python", workspace
)
challenge = create_challenge_from_spec_file(challenge_info.spec_file)
scores = challenge.get_scores(workspace)

View File

@@ -86,7 +86,7 @@ class Challenge(ABC):
assert is_score_100
async def run_challenge(self, config: AgentBenchmarkConfig, cutoff: int) -> None:
from agbenchmark.agent_interface import copy_artifacts_into_temp_folder
from agbenchmark.agent_interface import copy_challenge_artifacts_into_workspace
if not self.data.task:
return
@@ -98,7 +98,9 @@ class Challenge(ABC):
)
print(f"{Fore.BLACK}Task: {self.data.task}{Fore.RESET}")
await run_api_agent(self.data, config, self.ARTIFACTS_LOCATION, cutoff)
await run_api_agent(
self.data.task, config, cutoff, Path(self.ARTIFACTS_LOCATION)
)
# hidden files are added after the agent runs. Hidden files can be python test files.
# We copy them in the temporary folder to make it easy to import the code produced by the agent
@@ -107,7 +109,9 @@ class Challenge(ABC):
str(Path(self.CHALLENGE_LOCATION).parent),
]
for path in artifact_paths:
copy_artifacts_into_temp_folder(config.temp_folder, "custom_python", path)
copy_challenge_artifacts_into_workspace(
path, "custom_python", config.temp_folder
)
@staticmethod
def get_artifacts_out(