Merge branch 'dev' of github.com:Significant-Gravitas/AutoGPT into fix/copilot-tool-output-e2b-bridging

This commit is contained in:
Zamil Majdy
2026-04-02 18:16:25 +02:00
4 changed files with 205 additions and 3 deletions

View File

@@ -59,6 +59,16 @@ _null_cache: TTLCache[tuple[str, str], bool] = TTLCache(
maxsize=_CACHE_MAX_SIZE, ttl=_NULL_CACHE_TTL
)
# GitHub user identity caches (keyed by user_id only, not provider tuple).
# Declared here so invalidate_user_provider_cache() can reference them.
_GH_IDENTITY_CACHE_TTL = 600.0 # 10 min — profile data rarely changes
_gh_identity_cache: TTLCache[str, dict[str, str]] = TTLCache(
maxsize=_CACHE_MAX_SIZE, ttl=_GH_IDENTITY_CACHE_TTL
)
_gh_identity_null_cache: TTLCache[str, bool] = TTLCache(
maxsize=_CACHE_MAX_SIZE, ttl=_NULL_CACHE_TTL
)
def invalidate_user_provider_cache(user_id: str, provider: str) -> None:
"""Remove the cached entry for *user_id*/*provider* from both caches.
@@ -66,11 +76,19 @@ def invalidate_user_provider_cache(user_id: str, provider: str) -> None:
Call this after storing new credentials so that the next
``get_provider_token()`` call performs a fresh DB lookup instead of
serving a stale TTL-cached result.
For GitHub specifically, also clears the git-identity caches so that
``get_github_user_git_identity()`` re-fetches the user's profile on
the next call instead of serving stale identity data.
"""
key = (user_id, provider)
_token_cache.pop(key, None)
_null_cache.pop(key, None)
if provider == "github":
_gh_identity_cache.pop(user_id, None)
_gh_identity_null_cache.pop(user_id, None)
# Register this module's cache-bust function with the credentials manager so
# that any create/update/delete operation immediately evicts stale cache
@@ -177,3 +195,76 @@ async def get_integration_env_vars(user_id: str) -> dict[str, str]:
for var in var_names:
env[var] = token
return env
# ---------------------------------------------------------------------------
# GitHub user identity (for git committer env vars)
# ---------------------------------------------------------------------------
async def get_github_user_git_identity(user_id: str) -> dict[str, str] | None:
"""Fetch the GitHub user's name and email for git committer env vars.
Uses the ``/user`` GitHub API endpoint with the user's stored token.
Returns a dict with ``GIT_AUTHOR_NAME``, ``GIT_AUTHOR_EMAIL``,
``GIT_COMMITTER_NAME``, and ``GIT_COMMITTER_EMAIL`` if the user has a
connected GitHub account. Returns ``None`` otherwise.
Results are cached for 10 minutes; "not connected" results are cached for
60 s (same as null-token cache).
"""
if user_id in _gh_identity_null_cache:
return None
if cached := _gh_identity_cache.get(user_id):
return cached
token = await get_provider_token(user_id, "github")
if not token:
_gh_identity_null_cache[user_id] = True
return None
import aiohttp
try:
async with aiohttp.ClientSession() as session:
async with session.get(
"https://api.github.com/user",
headers={
"Authorization": f"token {token}",
"Accept": "application/vnd.github+json",
},
timeout=aiohttp.ClientTimeout(total=5),
) as resp:
if resp.status != 200:
logger.warning(
"[git-identity] GitHub /user returned %s for user %s",
resp.status,
user_id,
)
return None
data = await resp.json()
except Exception as exc:
logger.warning(
"[git-identity] Failed to fetch GitHub profile for user %s: %s",
user_id,
exc,
)
return None
name = data.get("name") or data.get("login") or "AutoGPT User"
# GitHub may return email=null if the user has set their email to private.
# Fall back to the noreply address GitHub generates for every account.
email = data.get("email")
if not email:
gh_id = data.get("id", "")
login = data.get("login", "user")
email = f"{gh_id}+{login}@users.noreply.github.com"
identity = {
"GIT_AUTHOR_NAME": name,
"GIT_AUTHOR_EMAIL": email,
"GIT_COMMITTER_NAME": name,
"GIT_COMMITTER_EMAIL": email,
}
_gh_identity_cache[user_id] = identity
return identity

View File

@@ -9,6 +9,8 @@ from backend.copilot.integration_creds import (
_NULL_CACHE_TTL,
_TOKEN_CACHE_TTL,
PROVIDER_ENV_VARS,
_gh_identity_cache,
_gh_identity_null_cache,
_null_cache,
_token_cache,
get_integration_env_vars,
@@ -49,9 +51,13 @@ def clear_caches():
"""Ensure clean caches before and after every test."""
_token_cache.clear()
_null_cache.clear()
_gh_identity_cache.clear()
_gh_identity_null_cache.clear()
yield
_token_cache.clear()
_null_cache.clear()
_gh_identity_cache.clear()
_gh_identity_null_cache.clear()
class TestInvalidateUserProviderCache:
@@ -77,6 +83,34 @@ class TestInvalidateUserProviderCache:
invalidate_user_provider_cache(_USER, _PROVIDER)
assert other_key in _token_cache
def test_clears_gh_identity_cache_for_github_provider(self):
"""When provider is 'github', identity caches must also be cleared."""
_gh_identity_cache[_USER] = {
"GIT_AUTHOR_NAME": "Old Name",
"GIT_AUTHOR_EMAIL": "old@example.com",
"GIT_COMMITTER_NAME": "Old Name",
"GIT_COMMITTER_EMAIL": "old@example.com",
}
invalidate_user_provider_cache(_USER, "github")
assert _USER not in _gh_identity_cache
def test_clears_gh_identity_null_cache_for_github_provider(self):
"""When provider is 'github', the identity null-cache must also be cleared."""
_gh_identity_null_cache[_USER] = True
invalidate_user_provider_cache(_USER, "github")
assert _USER not in _gh_identity_null_cache
def test_does_not_clear_gh_identity_cache_for_other_providers(self):
"""When provider is NOT 'github', identity caches must be left alone."""
_gh_identity_cache[_USER] = {
"GIT_AUTHOR_NAME": "Some Name",
"GIT_AUTHOR_EMAIL": "some@example.com",
"GIT_COMMITTER_NAME": "Some Name",
"GIT_COMMITTER_EMAIL": "some@example.com",
}
invalidate_user_provider_cache(_USER, "some-other-provider")
assert _USER in _gh_identity_cache
class TestGetProviderToken:
@pytest.mark.asyncio(loop_scope="session")

View File

@@ -22,7 +22,10 @@ from e2b import AsyncSandbox
from e2b.exceptions import TimeoutException
from backend.copilot.context import E2B_WORKDIR, get_current_sandbox
from backend.copilot.integration_creds import get_integration_env_vars
from backend.copilot.integration_creds import (
get_github_user_git_identity,
get_integration_env_vars,
)
from backend.copilot.model import ChatSession
from .base import BaseTool
@@ -159,6 +162,12 @@ class BashExecTool(BaseTool):
secret_values = [v for v in integration_env.values() if v]
envs.update(integration_env)
# Set git author/committer identity from the user's GitHub profile
# so commits made in the sandbox are attributed correctly.
git_identity = await get_github_user_git_identity(user_id)
if git_identity:
envs.update(git_identity)
try:
result = await sandbox.commands.run(
f"bash -c {shlex.quote(command)}",

View File

@@ -38,7 +38,10 @@ class TestBashExecE2BTokenInjection:
with patch(
"backend.copilot.tools.bash_exec.get_integration_env_vars",
new=AsyncMock(return_value=env_vars),
) as mock_get_env:
) as mock_get_env, patch(
"backend.copilot.tools.bash_exec.get_github_user_git_identity",
new=AsyncMock(return_value=None),
):
result = await tool._execute_on_e2b(
sandbox=sandbox,
command="echo hi",
@@ -53,6 +56,66 @@ class TestBashExecE2BTokenInjection:
assert call_kwargs["envs"]["GITHUB_TOKEN"] == "gh-secret"
assert isinstance(result, BashExecResponse)
@pytest.mark.asyncio(loop_scope="session")
async def test_git_identity_set_from_github_profile(self):
"""When user has a connected GitHub account, git env vars are set from their profile."""
tool = _make_tool()
session = make_session(user_id=_USER)
sandbox = _make_sandbox(stdout="ok")
identity = {
"GIT_AUTHOR_NAME": "Test User",
"GIT_AUTHOR_EMAIL": "test@example.com",
"GIT_COMMITTER_NAME": "Test User",
"GIT_COMMITTER_EMAIL": "test@example.com",
}
with patch(
"backend.copilot.tools.bash_exec.get_integration_env_vars",
new=AsyncMock(return_value={}),
), patch(
"backend.copilot.tools.bash_exec.get_github_user_git_identity",
new=AsyncMock(return_value=identity),
):
await tool._execute_on_e2b(
sandbox=sandbox,
command="git commit -m test",
timeout=10,
session_id=session.session_id,
user_id=_USER,
)
call_kwargs = sandbox.commands.run.call_args[1]
assert call_kwargs["envs"]["GIT_AUTHOR_NAME"] == "Test User"
assert call_kwargs["envs"]["GIT_AUTHOR_EMAIL"] == "test@example.com"
assert call_kwargs["envs"]["GIT_COMMITTER_NAME"] == "Test User"
assert call_kwargs["envs"]["GIT_COMMITTER_EMAIL"] == "test@example.com"
@pytest.mark.asyncio(loop_scope="session")
async def test_no_git_identity_when_github_not_connected(self):
"""When user has no GitHub account, git identity env vars are absent."""
tool = _make_tool()
session = make_session(user_id=_USER)
sandbox = _make_sandbox(stdout="ok")
with patch(
"backend.copilot.tools.bash_exec.get_integration_env_vars",
new=AsyncMock(return_value={}),
), patch(
"backend.copilot.tools.bash_exec.get_github_user_git_identity",
new=AsyncMock(return_value=None),
):
await tool._execute_on_e2b(
sandbox=sandbox,
command="echo hi",
timeout=10,
session_id=session.session_id,
user_id=_USER,
)
call_kwargs = sandbox.commands.run.call_args[1]
assert "GIT_AUTHOR_NAME" not in call_kwargs["envs"]
assert "GIT_COMMITTER_EMAIL" not in call_kwargs["envs"]
@pytest.mark.asyncio(loop_scope="session")
async def test_no_token_injection_when_user_id_is_none(self):
"""When user_id is None, get_integration_env_vars must NOT be called."""
@@ -63,7 +126,10 @@ class TestBashExecE2BTokenInjection:
with patch(
"backend.copilot.tools.bash_exec.get_integration_env_vars",
new=AsyncMock(return_value={"GH_TOKEN": "should-not-appear"}),
) as mock_get_env:
) as mock_get_env, patch(
"backend.copilot.tools.bash_exec.get_github_user_git_identity",
new=AsyncMock(return_value=None),
) as mock_get_identity:
result = await tool._execute_on_e2b(
sandbox=sandbox,
command="echo hi",
@@ -73,6 +139,8 @@ class TestBashExecE2BTokenInjection:
)
mock_get_env.assert_not_called()
mock_get_identity.assert_not_called()
call_kwargs = sandbox.commands.run.call_args[1]
assert "GH_TOKEN" not in call_kwargs["envs"]
assert "GIT_AUTHOR_NAME" not in call_kwargs["envs"]
assert isinstance(result, BashExecResponse)