diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000000..6d308befa7 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,10 @@ +{ + "permissions": { + "allowedTools": [ + "Read", "Grep", "Glob", + "Bash(ls:*)", "Bash(cat:*)", "Bash(grep:*)", "Bash(find:*)", + "Bash(git status:*)", "Bash(git diff:*)", "Bash(git log:*)", "Bash(git worktree:*)", + "Bash(tmux:*)", "Bash(sleep:*)", "Bash(branchlet:*)" + ] + } +} diff --git a/.github/workflows/classic-forge-ci.yml b/.github/workflows/classic-forge-ci.yml index 07ceec4f90..fa0aa41981 100644 --- a/.github/workflows/classic-forge-ci.yml +++ b/.github/workflows/classic-forge-ci.yml @@ -55,6 +55,9 @@ jobs: - name: Install Python dependencies run: poetry install + - name: Install Playwright browsers + run: poetry run playwright install chromium + - name: Run pytest with coverage run: | poetry run pytest -vv \ diff --git a/classic/benchmark/.env.example b/classic/benchmark/.env.example new file mode 100644 index 0000000000..e4fb03486f --- /dev/null +++ b/classic/benchmark/.env.example @@ -0,0 +1,4 @@ +AGENT_NAME=mini-agi +REPORTS_FOLDER="reports/mini-agi" +OPENAI_API_KEY="sk-" # for LLM eval +BUILD_SKILL_TREE=false # set to true to build the skill tree. diff --git a/classic/benchmark/frontend/.env.example b/classic/benchmark/frontend/.env.example new file mode 100644 index 0000000000..168cf5b05c --- /dev/null +++ b/classic/benchmark/frontend/.env.example @@ -0,0 +1,14 @@ +# Since the ".env" file is gitignored, you can use the ".env.example" file to +# build a new ".env" file when you clone the repo. Keep this file up-to-date +# when you add new variables to `.env`. + +# This file will be committed to version control, so make sure not to have any +# secrets in it. If you are cloning this repo, create a copy of this file named +# ".env" and populate it with your secrets. + +# When adding additional environment variables, the schema in "/src/env.mjs" +# should be updated accordingly. + +# Prisma +# https://www.prisma.io/docs/reference/database-reference/connection-urls#env +DATABASE_URL="file:./db.sqlite" diff --git a/classic/direct_benchmark/direct_benchmark/__main__.py b/classic/direct_benchmark/direct_benchmark/__main__.py index f855ba08b9..d5d9731303 100644 --- a/classic/direct_benchmark/direct_benchmark/__main__.py +++ b/classic/direct_benchmark/direct_benchmark/__main__.py @@ -19,6 +19,22 @@ from .models import ( from .ui import console +def get_default_model() -> str: + """Get the default model based on available API keys. + + Returns the model preset name for the first available API key, + preferring Claude > OpenAI > Groq. + """ + if os.environ.get("ANTHROPIC_API_KEY"): + return "claude" + elif os.environ.get("OPENAI_API_KEY"): + return "openai" + elif os.environ.get("GROQ_API_KEY"): + return "groq" + # Fallback to openai (most commonly available in CI) + return "openai" + + @click.group() @click.version_option(version="0.1.0") def cli(): @@ -40,8 +56,8 @@ def cli(): @click.option( "--models", "-m", - default="claude", - help=f"Comma-separated model presets. Available: {', '.join(MODEL_PRESETS.keys())}", + default=None, + help=f"Comma-separated model presets. Auto-detects from API keys if not specified. Available: {', '.join(MODEL_PRESETS.keys())}", ) @click.option( "--categories", @@ -232,7 +248,7 @@ def cli(): ) def run( strategies: str, - models: str, + models: Optional[str], categories: Optional[str], skip_categories: Optional[str], tests: Optional[str], @@ -280,7 +296,11 @@ def run( console.print(f"Available: {STRATEGIES}") sys.exit(1) - # Parse models + # Parse models (auto-detect from API keys if not specified) + if models is None: + models = get_default_model() + console.print(f"[dim]Auto-detected model: {models}[/dim]") + model_list = [m.strip() for m in models.split(",")] invalid_models = [m for m in model_list if m not in MODEL_PRESETS] if invalid_models: