Add back api mode (#5172)

This commit is contained in:
merwanehamadi
2023-09-06 22:54:32 -07:00
committed by GitHub
6 changed files with 111 additions and 11 deletions

View File

@@ -240,6 +240,8 @@ jobs:
poetry run uvicorn server:app --reload &
sleep 5
export AGENT_NAME=mini-agi
echo "poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000"
poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000
else
echo "${prefix}agbenchmark start"
${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."

View File

@@ -231,10 +231,13 @@ def generate_tests() -> None: # sourcery skip: invert-any-all
# for suites to know if the file has already been used to generate the tests
# Dynamic class creation
while json_files:
json_file = (
json_files.popleft()
) # Take and remove the first element from json_files
if challenge_should_be_ignored(json_file):
continue
data = ChallengeData.get_json_from_path(json_file)
suite_config = SuiteConfig.suite_data_if_suite(Path(json_file))
@@ -293,4 +296,8 @@ def generate_tests() -> None: # sourcery skip: invert-any-all
print(f"Generated test for {data['name']}.")
def challenge_should_be_ignored(json_file):
return "challenges/deprecated" in json_file or "challenges/library" in json_file
generate_tests()

View File

@@ -95,7 +95,8 @@ def run_benchmark(
test: Optional[str] = None,
suite: Optional[str] = None,
cutoff: Optional[int] = None,
server: bool = False,
api_mode: bool = False,
host: Optional[str] = None,
) -> int:
"""Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
# Check if configuration file exists and is not empty
@@ -132,7 +133,12 @@ def run_benchmark(
config = json.load(f)
else:
config = {}
host = host or config.get("host")
api_mode = api_mode or config.get("api_mode")
if host:
config["host"] = host
if api_mode:
config["api_mode"] = api_mode
print("benchmark run path", CONFIG_PATH, HOME_DIRECTORY)
if not config.get("workspace"):
config["workspace"] = click.prompt(
@@ -141,7 +147,7 @@ def run_benchmark(
show_default=True,
)
if config.get("api_mode") and not config.get("host"):
if api_mode and not host:
config["host"] = click.prompt(
"Please enter the Agent API host address",
default="http://localhost:8000",
@@ -195,7 +201,10 @@ def run_benchmark(
elif explore:
print("Only attempt challenges that have never been beaten")
pytest_args.append("--explore")
if host:
pytest_args.append(f"--host={host}")
if api_mode:
pytest_args.append("--api_mode")
if mock:
pytest_args.append("--mock")
@@ -215,6 +224,8 @@ def run_benchmark(
print(f"Setting cuttoff override to {cutoff} seconds.")
pytest_args.extend((str(CURRENT_DIRECTORY), "--cache-clear"))
pytest_args.append("--disable-warnings")
return pytest.main(pytest_args)
@@ -249,6 +260,8 @@ def cli() -> None:
)
@click.option("--nc", is_flag=True, help="Run without cutoff")
@click.option("--cutoff", help="Set or override tests cutoff (seconds)")
@click.option("--api_mode", help="API mode")
@click.option("--host", help="Define API host")
def start(
maintain: bool,
improve: bool,
@@ -262,6 +275,8 @@ def start(
suite: Optional[str] = None,
cutoff: Optional[int] = None,
backend: Optional[bool] = False,
api_mode: bool = False,
host: Optional[str] = None,
) -> Any:
# Redirect stdout if backend is True
original_stdout = sys.stdout # Save the original standard output
@@ -282,6 +297,8 @@ def start(
test=test,
suite=suite,
cutoff=cutoff,
api_mode=api_mode,
host=host,
)
sys.stdout = original_stdout
@@ -404,4 +421,4 @@ def get_regression_data() -> Any:
# if __name__ == "__main__":
# start()
# start()

74
benchmark/poetry.lock generated
View File

@@ -152,6 +152,27 @@ files = [
[package.dependencies]
frozenlist = ">=1.1.0"
[[package]]
name = "anyio"
version = "3.7.1"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
optional = false
python-versions = ">=3.7"
files = [
{file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"},
{file = "anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780"},
]
[package.dependencies]
exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
idna = ">=2.8"
sniffio = ">=1.1"
[package.extras]
doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-jquery"]
test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
trio = ["trio (<0.22)"]
[[package]]
name = "appnope"
version = "0.1.3"
@@ -617,6 +638,26 @@ files = [
[package.extras]
tests = ["asttokens", "littleutils", "pytest", "rich"]
[[package]]
name = "fastapi"
version = "0.103.1"
description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
optional = false
python-versions = ">=3.7"
files = [
{file = "fastapi-0.103.1-py3-none-any.whl", hash = "sha256:5e5f17e826dbd9e9b5a5145976c5cd90bcaa61f2bf9a69aca423f2bcebe44d83"},
{file = "fastapi-0.103.1.tar.gz", hash = "sha256:345844e6a82062f06a096684196aaf96c1198b25c06b72c1311b882aa2d8a35d"},
]
[package.dependencies]
anyio = ">=3.7.1,<4.0.0"
pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
starlette = ">=0.27.0,<0.28.0"
typing-extensions = ">=4.5.0"
[package.extras]
all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
[[package]]
name = "filelock"
version = "3.12.3"
@@ -2070,6 +2111,20 @@ files = [
[package.extras]
cli = ["click (>=5.0)"]
[[package]]
name = "python-multipart"
version = "0.0.6"
description = "A streaming multipart parser for Python"
optional = false
python-versions = ">=3.7"
files = [
{file = "python_multipart-0.0.6-py3-none-any.whl", hash = "sha256:ee698bab5ef148b0a760751c261902cd096e57e10558e11aca17646b74ee1c18"},
{file = "python_multipart-0.0.6.tar.gz", hash = "sha256:e9925a80bb668529f1b67c7fdb0a5dacdd7cbfc6fb0bff3ea443fe22bdd62132"},
]
[package.extras]
dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatch", "invoke (==1.7.3)", "more-itertools (==4.3.0)", "pbr (==4.3.0)", "pluggy (==1.0.0)", "py (==1.11.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-timeout (==2.1.0)", "pyyaml (==5.1)"]
[[package]]
name = "pytz"
version = "2023.3.post1"
@@ -2295,6 +2350,23 @@ pure-eval = "*"
[package.extras]
tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
[[package]]
name = "starlette"
version = "0.27.0"
description = "The little ASGI library that shines."
optional = false
python-versions = ">=3.7"
files = [
{file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"},
{file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"},
]
[package.dependencies]
anyio = ">=3.4.0,<5"
[package.extras]
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
[[package]]
name = "tomli"
version = "2.0.1"
@@ -2597,4 +2669,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "c97e1a4310f7d362f157f164b01393eb21fd182e197384c7867cfe002ea46506"
content-hash = "e86dcefdd1198516ad76fafb4877fd46f5b8623a7be52e069c3ac39509ba7c4e"

View File

@@ -29,6 +29,8 @@ selenium = "^4.11.2"
agent-protocol-client = "^0.2.2"
pytest-asyncio = "^0.21.1"
uvicorn = "^0.23.2"
fastapi = "^0.103.1"
python-multipart = "^0.0.6"
[tool.poetry.group.dev.dependencies]

View File

@@ -17,10 +17,6 @@ logger = logging.getLogger(__name__)
app = FastAPI()
artifacts: List[Dict[str, Any]] = []
with open("agent/gpt-engineer/agbenchmark/config.json", "r") as file:
config = json.load(file)
logger.info("Loaded configuration")
class Task(BaseModel):
input: str
@@ -34,7 +30,11 @@ async def upload_file(
"Uploading file for task_id: %s with relative path: %s", task_id, relative_path
)
absolute_directory_path = Path(__file__).parent.absolute()
save_path = absolute_directory_path / "agent/gpt-engineer" / config["workspace"]
save_path = (
absolute_directory_path
/ "agent/gpt-engineer"
/ "projects/my-new-project/workspace"
)
random_string = str(randint(0, 100000))
while random_string in artifacts: