mirror of
https://github.com/microsoft/autogen.git
synced 2026-02-17 12:41:32 -05:00
* Prints the version of AutoGenBench from the command line, closing i1458 * Added autogenbench version to timestamp.txt * Attempting to fix formatting. * Add a gitignore for autogenbench * Generalize to read all template dirs from Templates * AutoGenBench logs telemetry when available. * Remove spaces if present from template names. * Bump version. * Fixed formatting. * Allow native warning to be skipped. Mount autogen repo in Docker if it can be found (experimental). * Native execution now occurs in a venv. * Bump version. * Fixed a prompt escaping bug evident in GAIA task '6f37996b-2ac7-44b0-8e68-6d28256631b4' * Updated all scenarios to use template discovery. * Update with main version of runtime_logging. --------- Co-authored-by: gagb <gagb@users.noreply.github.com>
105 lines
3.1 KiB
Python
105 lines
3.1 KiB
Python
import sys
|
|
from .version import __version__
|
|
from .run_cmd import run_cli
|
|
from .clone_cmd import clone_cli
|
|
from .tabulate_cmd import tabulate_cli
|
|
|
|
|
|
def main(args=None):
|
|
if args is None:
|
|
args = sys.argv[:] # Shallow copy
|
|
|
|
invocation_cmd = "autogenbench"
|
|
version_string = f"AutoGenBench version {__version__}"
|
|
|
|
commands = [
|
|
{
|
|
"command": "clone",
|
|
"description": "download and expand a benchmark",
|
|
"function": clone_cli,
|
|
},
|
|
{
|
|
"command": "run",
|
|
"description": "run a given benchmark configuration",
|
|
"function": run_cli,
|
|
},
|
|
{
|
|
"command": "tabulate",
|
|
"description": "tabulate the results of a previous run",
|
|
"function": tabulate_cli,
|
|
},
|
|
{
|
|
"command": "--version",
|
|
"description": f"print the version of {invocation_cmd}",
|
|
"function": lambda _args: print(f"{version_string}"),
|
|
},
|
|
{"command": "--help", "description": "print this message", "function": None},
|
|
]
|
|
|
|
# Some help string formatting
|
|
commands_list = ", ".join(["'" + c["command"] + "'" for c in commands])
|
|
max_command_len = max([len(c["command"]) for c in commands])
|
|
commands_details = ""
|
|
for c in commands:
|
|
padded_cmd = c["command"]
|
|
while len(padded_cmd) < max_command_len:
|
|
padded_cmd = " " + padded_cmd
|
|
commands_details += f" {padded_cmd}: {c['description']}\n"
|
|
|
|
usage_text = f"""
|
|
{version_string}
|
|
|
|
usage: {invocation_cmd} COMMAND ARGS
|
|
|
|
Where, COMMAND is one of: {commands_list}
|
|
|
|
and ARGS are specific to the command.
|
|
(use '{invocation_cmd} COMMAND --help' for command-specific help)
|
|
""".strip()
|
|
|
|
help_text = f"""
|
|
{version_string}
|
|
|
|
usage: {invocation_cmd} COMMAND ARGS
|
|
|
|
{invocation_cmd} is a tool for running and managing AutoGen benchmark scenarios. A typically session might resemble:
|
|
|
|
{invocation_cmd} clone HumanEval
|
|
cd HumanEval
|
|
{invocation_cmd} run Tasks/human_eval_two_agents_gpt4.jsonl
|
|
|
|
which will download the HumanEval benchmark, expand it, and then run the benchmark once with the `human_eval_two_agents_gpt4` configuration.
|
|
|
|
Available COMMANDs include:
|
|
|
|
{commands_details}
|
|
|
|
Additionally, you can use the --help option with any command for further command-specific instructions. E.g.,
|
|
|
|
{invocation_cmd} run --help
|
|
{invocation_cmd} clone --help
|
|
|
|
""".strip()
|
|
|
|
if len(args) < 2:
|
|
sys.stderr.write(usage_text + "\n")
|
|
sys.exit(2)
|
|
|
|
for command in commands:
|
|
if args[1].lower() == command["command"]:
|
|
if command["function"] is None:
|
|
sys.stderr.write(help_text + "\n")
|
|
sys.exit(0)
|
|
else:
|
|
command["function"]([invocation_cmd + " " + command["command"]] + args[2:])
|
|
sys.exit(0)
|
|
|
|
# Command not found
|
|
sys.stderr.write(f"Invalid command '{args[1]}'. Available commands include: {commands_list}\n")
|
|
sys.exit(2)
|
|
|
|
|
|
###############################################################################
|
|
if __name__ == "__main__":
|
|
main()
|