Files
AutoGPT/autogpt_platform/backend/backend/copilot/rate_limit.py
Zamil Majdy 28b26dde94 feat(platform): spend credits to reset CoPilot daily rate limit (#12526)
## Summary
- When users hit their daily CoPilot token limit, they can now spend
credits ($2.00 default) to reset it and continue working
- Adds a dialog prompt when rate limit error occurs, offering the
credit-based reset option
- Adds a "Reset daily limit" button in the usage limits panel when the
daily limit is reached
- Backend: new `POST /api/chat/usage/reset` endpoint,
`reset_daily_usage()` Redis helper, `rate_limit_reset_cost` config
- Frontend: `RateLimitResetDialog` component, updated
`UsagePanelContent` with reset button, `useCopilotStream` exposes rate
limit state
- **NEW: Resetting the daily limit also reduces weekly usage by the
daily limit amount**, effectively granting 1 extra day's worth of weekly
capacity (e.g., daily_limit=10000 → weekly usage reduced by 10000,
clamped to 0)

## Context
Users have been confused about having credits available but being
blocked by rate limits (REQ-63, REQ-61). This provides a short-term
solution allowing users to spend credits to bypass their daily limit.

The weekly usage reduction ensures that a paid daily reset doesn't just
move the bottleneck to the weekly limit — users get genuine additional
capacity for the day they paid to unlock.

### Checklist 📋

#### For code changes:
- [x] I have clearly listed my changes in the PR description
- [x] I have made a test plan
- [x] I have tested my changes according to the test plan:
  - [x] Hit daily rate limit → dialog appears with reset option
- [x] Click "Reset for $2.00" → credits charged, daily counter reset,
dialog closes
- [x] Usage panel shows "Reset daily limit" button when at 100% daily
usage
- [x] When `rate_limit_reset_cost=0` (disabled), rate limit shows toast
instead of dialog
  - [x] Insufficient credits → error toast shown
  - [x] Verify existing rate limit tests pass
  - [x] Unit tests: weekly counter reduced by daily_limit on reset
  - [x] Unit tests: weekly counter clamped to 0 when usage < daily_limit
  - [x] Unit tests: no weekly reduction when daily_token_limit=0

#### For configuration changes:
- [x] `.env.default` is updated or already compatible with my changes
(new config fields `rate_limit_reset_cost` and `max_daily_resets` have
defaults in code)
- [x] `docker-compose.yml` is updated or already compatible with my
changes (no Docker changes needed)
2026-03-26 13:52:08 +00:00

439 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""CoPilot rate limiting based on token usage.
Uses Redis fixed-window counters to track per-user token consumption
with configurable daily and weekly limits. Daily windows reset at
midnight UTC; weekly windows reset at ISO week boundary (Monday 00:00
UTC). Fails open when Redis is unavailable to avoid blocking users.
"""
import asyncio
import logging
from datetime import UTC, datetime, timedelta
from pydantic import BaseModel, Field
from redis.exceptions import RedisError
from backend.data.redis_client import get_redis_async
logger = logging.getLogger(__name__)
# Redis key prefixes
_USAGE_KEY_PREFIX = "copilot:usage"
class UsageWindow(BaseModel):
"""Usage within a single time window."""
used: int
limit: int = Field(
description="Maximum tokens allowed in this window. 0 means unlimited."
)
resets_at: datetime
class CoPilotUsageStatus(BaseModel):
"""Current usage status for a user across all windows."""
daily: UsageWindow
weekly: UsageWindow
reset_cost: int = Field(
default=0,
description="Credit cost (in cents) to reset the daily limit. 0 = feature disabled.",
)
class RateLimitExceeded(Exception):
"""Raised when a user exceeds their CoPilot usage limit."""
def __init__(self, window: str, resets_at: datetime):
self.window = window
self.resets_at = resets_at
delta = resets_at - datetime.now(UTC)
total_secs = delta.total_seconds()
if total_secs <= 0:
time_str = "now"
else:
hours = int(total_secs // 3600)
minutes = int((total_secs % 3600) // 60)
time_str = f"{hours}h {minutes}m" if hours > 0 else f"{minutes}m"
super().__init__(
f"You've reached your {window} usage limit. Resets in {time_str}."
)
async def get_usage_status(
user_id: str,
daily_token_limit: int,
weekly_token_limit: int,
rate_limit_reset_cost: int = 0,
) -> CoPilotUsageStatus:
"""Get current usage status for a user.
Args:
user_id: The user's ID.
daily_token_limit: Max tokens per day (0 = unlimited).
weekly_token_limit: Max tokens per week (0 = unlimited).
rate_limit_reset_cost: Credit cost (cents) to reset daily limit (0 = disabled).
Returns:
CoPilotUsageStatus with current usage and limits.
"""
now = datetime.now(UTC)
daily_used = 0
weekly_used = 0
try:
redis = await get_redis_async()
daily_raw, weekly_raw = await asyncio.gather(
redis.get(_daily_key(user_id, now=now)),
redis.get(_weekly_key(user_id, now=now)),
)
daily_used = int(daily_raw or 0)
weekly_used = int(weekly_raw or 0)
except (RedisError, ConnectionError, OSError):
logger.warning("Redis unavailable for usage status, returning zeros")
return CoPilotUsageStatus(
daily=UsageWindow(
used=daily_used,
limit=daily_token_limit,
resets_at=_daily_reset_time(now=now),
),
weekly=UsageWindow(
used=weekly_used,
limit=weekly_token_limit,
resets_at=_weekly_reset_time(now=now),
),
reset_cost=rate_limit_reset_cost,
)
async def check_rate_limit(
user_id: str,
daily_token_limit: int,
weekly_token_limit: int,
) -> None:
"""Check if user is within rate limits. Raises RateLimitExceeded if not.
This is a pre-turn soft check. The authoritative usage counter is updated
by ``record_token_usage()`` after the turn completes. Under concurrency,
two parallel turns may both pass this check against the same snapshot.
This is acceptable because token-based limits are approximate by nature
(the exact token count is unknown until after generation).
Fails open: if Redis is unavailable, allows the request.
"""
# Short-circuit: when both limits are 0 (unlimited) skip the Redis
# round-trip entirely.
if daily_token_limit <= 0 and weekly_token_limit <= 0:
return
now = datetime.now(UTC)
try:
redis = await get_redis_async()
daily_raw, weekly_raw = await asyncio.gather(
redis.get(_daily_key(user_id, now=now)),
redis.get(_weekly_key(user_id, now=now)),
)
daily_used = int(daily_raw or 0)
weekly_used = int(weekly_raw or 0)
except (RedisError, ConnectionError, OSError):
logger.warning("Redis unavailable for rate limit check, allowing request")
return
# Worst-case overshoot: N concurrent requests × ~15K tokens each.
if daily_token_limit > 0 and daily_used >= daily_token_limit:
raise RateLimitExceeded("daily", _daily_reset_time(now=now))
if weekly_token_limit > 0 and weekly_used >= weekly_token_limit:
raise RateLimitExceeded("weekly", _weekly_reset_time(now=now))
async def reset_daily_usage(user_id: str, daily_token_limit: int = 0) -> bool:
"""Reset a user's daily token usage counter in Redis.
Called after a user pays credits to extend their daily limit.
Also reduces the weekly usage counter by ``daily_token_limit`` tokens
(clamped to 0) so the user effectively gets one extra day's worth of
weekly capacity.
Args:
user_id: The user's ID.
daily_token_limit: The configured daily token limit. When positive,
the weekly counter is reduced by this amount.
Fails open: returns False if Redis is unavailable (consistent with
the fail-open design of this module).
"""
now = datetime.now(UTC)
try:
redis = await get_redis_async()
# Use a MULTI/EXEC transaction so that DELETE (daily) and DECRBY
# (weekly) either both execute or neither does. This prevents the
# scenario where the daily counter is cleared but the weekly
# counter is not decremented — which would let the caller refund
# credits even though the daily limit was already reset.
d_key = _daily_key(user_id, now=now)
w_key = _weekly_key(user_id, now=now) if daily_token_limit > 0 else None
pipe = redis.pipeline(transaction=True)
pipe.delete(d_key)
if w_key is not None:
pipe.decrby(w_key, daily_token_limit)
results = await pipe.execute()
# Clamp negative weekly counter to 0 (best-effort; not critical).
if w_key is not None:
new_val = results[1] # DECRBY result
if new_val < 0:
await redis.set(w_key, 0, keepttl=True)
logger.info("Reset daily usage for user %s", user_id[:8])
return True
except (RedisError, ConnectionError, OSError):
logger.warning("Redis unavailable for resetting daily usage")
return False
_RESET_LOCK_PREFIX = "copilot:reset_lock"
_RESET_COUNT_PREFIX = "copilot:reset_count"
async def acquire_reset_lock(user_id: str, ttl_seconds: int = 10) -> bool:
"""Acquire a short-lived lock to serialize rate limit resets per user."""
try:
redis = await get_redis_async()
key = f"{_RESET_LOCK_PREFIX}:{user_id}"
return bool(await redis.set(key, "1", nx=True, ex=ttl_seconds))
except (RedisError, ConnectionError, OSError) as exc:
logger.warning("Redis unavailable for reset lock, rejecting reset: %s", exc)
return False
async def release_reset_lock(user_id: str) -> None:
"""Release the per-user reset lock."""
try:
redis = await get_redis_async()
await redis.delete(f"{_RESET_LOCK_PREFIX}:{user_id}")
except (RedisError, ConnectionError, OSError):
pass # Lock will expire via TTL
async def get_daily_reset_count(user_id: str) -> int | None:
"""Get how many times the user has reset today.
Returns None when Redis is unavailable so callers can fail-closed
for billed operations (as opposed to failing open for read-only
rate-limit checks).
"""
now = datetime.now(UTC)
try:
redis = await get_redis_async()
key = f"{_RESET_COUNT_PREFIX}:{user_id}:{now.strftime('%Y-%m-%d')}"
val = await redis.get(key)
return int(val or 0)
except (RedisError, ConnectionError, OSError):
logger.warning("Redis unavailable for reading daily reset count")
return None
async def increment_daily_reset_count(user_id: str) -> None:
"""Increment and track how many resets this user has done today."""
now = datetime.now(UTC)
try:
redis = await get_redis_async()
key = f"{_RESET_COUNT_PREFIX}:{user_id}:{now.strftime('%Y-%m-%d')}"
pipe = redis.pipeline(transaction=True)
pipe.incr(key)
seconds_until_reset = int((_daily_reset_time(now=now) - now).total_seconds())
pipe.expire(key, max(seconds_until_reset, 1))
await pipe.execute()
except (RedisError, ConnectionError, OSError):
logger.warning("Redis unavailable for tracking reset count")
async def record_token_usage(
user_id: str,
prompt_tokens: int,
completion_tokens: int,
*,
cache_read_tokens: int = 0,
cache_creation_tokens: int = 0,
) -> None:
"""Record token usage for a user across all windows.
Uses cost-weighted counting so cached tokens don't unfairly penalise
multi-turn conversations. Anthropic's pricing:
- uncached input: 100%
- cache creation: 25%
- cache read: 10%
- output: 100%
``prompt_tokens`` should be the *uncached* input count (``input_tokens``
from the API response). Cache counts are passed separately.
Args:
user_id: The user's ID.
prompt_tokens: Uncached input tokens.
completion_tokens: Output tokens.
cache_read_tokens: Tokens served from prompt cache (10% cost).
cache_creation_tokens: Tokens written to prompt cache (25% cost).
"""
prompt_tokens = max(0, prompt_tokens)
completion_tokens = max(0, completion_tokens)
cache_read_tokens = max(0, cache_read_tokens)
cache_creation_tokens = max(0, cache_creation_tokens)
weighted_input = (
prompt_tokens
+ round(cache_creation_tokens * 0.25)
+ round(cache_read_tokens * 0.1)
)
total = weighted_input + completion_tokens
if total <= 0:
return
raw_total = (
prompt_tokens + cache_read_tokens + cache_creation_tokens + completion_tokens
)
logger.info(
"Recording token usage for %s: raw=%d, weighted=%d "
"(uncached=%d, cache_read=%d@10%%, cache_create=%d@25%%, output=%d)",
user_id[:8],
raw_total,
total,
prompt_tokens,
cache_read_tokens,
cache_creation_tokens,
completion_tokens,
)
now = datetime.now(UTC)
try:
redis = await get_redis_async()
# transaction=False: these are independent INCRBY+EXPIRE pairs on
# separate keys — no cross-key atomicity needed. Skipping
# MULTI/EXEC avoids the overhead. If the connection drops between
# INCRBY and EXPIRE the key survives until the next date-based key
# rotation (daily/weekly), so the memory-leak risk is negligible.
pipe = redis.pipeline(transaction=False)
# Daily counter (expires at next midnight UTC)
d_key = _daily_key(user_id, now=now)
pipe.incrby(d_key, total)
seconds_until_daily_reset = int(
(_daily_reset_time(now=now) - now).total_seconds()
)
pipe.expire(d_key, max(seconds_until_daily_reset, 1))
# Weekly counter (expires end of week)
w_key = _weekly_key(user_id, now=now)
pipe.incrby(w_key, total)
seconds_until_weekly_reset = int(
(_weekly_reset_time(now=now) - now).total_seconds()
)
pipe.expire(w_key, max(seconds_until_weekly_reset, 1))
await pipe.execute()
except (RedisError, ConnectionError, OSError):
logger.warning(
"Redis unavailable for recording token usage (tokens=%d)",
total,
)
async def get_global_rate_limits(
user_id: str,
config_daily: int,
config_weekly: int,
) -> tuple[int, int]:
"""Resolve global rate limits from LaunchDarkly, falling back to config.
Args:
user_id: User ID for LD flag evaluation context.
config_daily: Fallback daily limit from ChatConfig.
config_weekly: Fallback weekly limit from ChatConfig.
Returns:
(daily_token_limit, weekly_token_limit) tuple.
"""
# Lazy import to avoid circular dependency:
# rate_limit -> feature_flag -> settings -> ... -> rate_limit
from backend.util.feature_flag import Flag, get_feature_flag_value
daily_raw = await get_feature_flag_value(
Flag.COPILOT_DAILY_TOKEN_LIMIT.value, user_id, config_daily
)
weekly_raw = await get_feature_flag_value(
Flag.COPILOT_WEEKLY_TOKEN_LIMIT.value, user_id, config_weekly
)
try:
daily = max(0, int(daily_raw))
except (TypeError, ValueError):
logger.warning("Invalid LD value for daily token limit: %r", daily_raw)
daily = config_daily
try:
weekly = max(0, int(weekly_raw))
except (TypeError, ValueError):
logger.warning("Invalid LD value for weekly token limit: %r", weekly_raw)
weekly = config_weekly
return daily, weekly
async def reset_user_usage(user_id: str, *, reset_weekly: bool = False) -> None:
"""Reset a user's usage counters.
Always deletes the daily Redis key. When *reset_weekly* is ``True``,
the weekly key is deleted as well.
Unlike read paths (``get_usage_status``, ``check_rate_limit``) which
fail-open on Redis errors, resets intentionally re-raise so the caller
knows the operation did not succeed. A silent failure here would leave
the admin believing the counters were zeroed when they were not.
"""
now = datetime.now(UTC)
keys_to_delete = [_daily_key(user_id, now=now)]
if reset_weekly:
keys_to_delete.append(_weekly_key(user_id, now=now))
try:
redis = await get_redis_async()
await redis.delete(*keys_to_delete)
except (RedisError, ConnectionError, OSError):
logger.warning("Redis unavailable for resetting user usage")
raise
# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------
def _daily_key(user_id: str, now: datetime | None = None) -> str:
if now is None:
now = datetime.now(UTC)
return f"{_USAGE_KEY_PREFIX}:daily:{user_id}:{now.strftime('%Y-%m-%d')}"
def _weekly_key(user_id: str, now: datetime | None = None) -> str:
if now is None:
now = datetime.now(UTC)
year, week, _ = now.isocalendar()
return f"{_USAGE_KEY_PREFIX}:weekly:{user_id}:{year}-W{week:02d}"
def _daily_reset_time(now: datetime | None = None) -> datetime:
"""Calculate when the current daily window resets (next midnight UTC)."""
if now is None:
now = datetime.now(UTC)
return now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1)
def _weekly_reset_time(now: datetime | None = None) -> datetime:
"""Calculate when the current weekly window resets (next Monday 00:00 UTC)."""
if now is None:
now = datetime.now(UTC)
days_until_monday = (7 - now.weekday()) % 7 or 7
return now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(
days=days_until_monday
)