Add formal ratelimiting headers

Three headers can now be included for API requests: X-Ratelimit-Used: Number of requests used in this period X-Ratelimit-Remaining: Number of requests left to use X-Ratelimit-Reset: Approximate number of seconds to end of period Additionally, 429 responses can be returned for requests that exceed the ratelimit. Ratelimits are per-IP normally. OAuth clients will be limited per user-client combo.
2026-04-27 03:00:12 -04:00 · 2014-02-18 17:35:55 -08:00
parent c918fdc587
commit 084f5736ce
6 changed files with 147 additions and 16 deletions
--- a/r2/example.ini
+++ b/r2/example.ini
@@ -204,12 +204,7 @@ embedly_api_key =


 ############################################ QUOTAS
-# rate limiter duration (minutes)
-RATELIMIT = 10
-# user agent substrings to hard-ratelimit to a number of requests per ten second period
-# example: agents = googlebot:10, appengine:2
-agents =
-# ratelimits for various types of relations creatable in subreddits
+# quota for various types of relations creatable in subreddits
 sr_banned_quota = 10000
 sr_moderator_invite_quota = 10000
 sr_contributor_quota = 10000
@@ -223,14 +218,39 @@ new_link_share_delay = 30 seconds
 max_sr_images = 50


+############################################ RATELIMITS
+
+# If true, send 429 responses on exceeded ratelimits
+# If false, send headers only, but don't abort
+# Only applies if tracking is enabled below
+ENFORCE_RATELIMIT = false
+
+# If true, store per-user request counts in ratelimits cache
+RL_SITEWIDE_ENABLED = true
+# How large of a burst window will users be allowed?
+RL_RESET_MINUTES = 10
+# What is the average request rate over the above time period?
+RL_AVG_REQ_PER_SEC = 0.5
+
+# Same as above, but configured separately for connections via OAuth
+RL_OAUTH_SITEWIDE_ENABLED = true
+RL_OAUTH_RESET_MINUTES = 10
+RL_OAUTH_AVG_REQ_PER_SEC = 0.5
+
+# user agent substrings to hard-ratelimit to a number of requests per ten second period
+# example: agents = googlebot:10, appengine:2
+agents =
+
+# karma needed to avoid per-subreddit submission ratelimits
+MIN_RATE_LIMIT_KARMA = 10
+MIN_RATE_LIMIT_COMMENT_KARMA = 1
+
+
 ############################################ THRESHOLDS
 # minimum item score to be considered for quota baskets
 QUOTA_THRESHOLD = 5
 # if the user has positive total karma, their per-subreddit karma will default to this, else 0
 MIN_UP_KARMA = 1
-# karma needed to avoid per-subreddit submission ratelimits
-MIN_RATE_LIMIT_KARMA = 10
-MIN_RATE_LIMIT_COMMENT_KARMA = 1
 # ages in days at which various actions are disallowed to preserve history
 REPLY_AGE_LIMIT = 180
 VOTE_AGE_LIMIT = 180
@@ -295,6 +315,8 @@ permacache_memcaches = 127.0.0.1:11211
 srmembercaches = 127.0.0.1:11211
 # a local cache that's not globally consistent and can have stale data (optional)
 stalecaches =
+# cache for tracking rate limit thresholds
+ratelimitcaches = 127.0.0.1:11211


 ############################################ MISCELLANEOUS
--- a/r2/r2/controllers/apiv1.py
+++ b/r2/r2/controllers/apiv1.py
@@ -30,6 +30,7 @@ class APIv1Controller(OAuth2ResourceController):
    def pre(self):
        OAuth2ResourceController.pre(self)
        self.check_for_bearer_token()
+        self.run_sitewide_ratelimits()

    def try_pagecache(self):
        pass
--- a/r2/r2/controllers/reddit_base.py
+++ b/r2/r2/controllers/reddit_base.py
@@ -570,15 +570,20 @@ def set_colors():
        c.bordercolor = request.GET.get('bordercolor')


+def _get_ratelimit_timeslice(slice_seconds):
+    slice_start, secs_since = divmod(time.time(), slice_seconds)
+    slice_start = time.gmtime(int(slice_start * slice_seconds))
+    secs_to_next = slice_seconds - int(secs_since)
+    return slice_start, secs_to_next
+
+
 def ratelimit_agent(agent, limit=10, slice_size=10):
    slice_size = min(slice_size, 60)
-    slice, remainder = map(int, divmod(time.time(), slice_size))
-    time_slice = time.gmtime(slice * slice_size)
+    time_slice, retry_after = _get_ratelimit_timeslice(slice_size)
    key = "rate_agent_" + agent + time.strftime("_%S", time_slice)
-
    g.cache.add(key, 0, time=slice_size + 1)
    if g.cache.incr(key) > limit:
-        request.environ['retry_after'] = slice_size - remainder
+        request.environ['retry_after'] = retry_after
        abort(429)

 appengine_re = re.compile(r'AppEngine-Google; \(\+http://code.google.com/appengine; appid: (?:dev|s)~([a-z0-9-]{6,30})\)\Z')
@@ -733,6 +738,7 @@ def abort_with_error(error):
 class MinimalController(BaseController):

    allow_stylesheets = False
+    defer_ratelimiting = False

    def request_key(self):
        # note that this references the cookie at request time, not
@@ -758,6 +764,65 @@ class MinimalController(BaseController):
    def cached_response(self):
        return ""

+    def run_sitewide_ratelimits(self):
+        """Ratelimit users and add ratelimit headers to the response.
+
+        Headers added are:
+        X-Ratelimit-Used: Number of requests used in this period
+        X-Ratelimit-Remaining: Number of requests left to use
+        X-Ratelimit-Reset: Approximate number of seconds to end of period
+
+        This function only has an effect if one of
+        g.RL_SITEWIDE_ENABLED or g.RL_OAUTH_SITEWIDE_ENABLED
+        are set to 'true' in the app configuration
+
+        If the ratelimit is exceeded, a 429 response will be sent,
+        unless the app configuration has g.ENFORCE_RATELIMIT off.
+        Headers will be sent even on aborted requests.
+
+        """
+        if c.cdn_cacheable or not is_api():
+            # No ratelimiting or headers for:
+            # * Web requests (HTML)
+            # * CDN requests (logged out via www.reddit.com)
+            return
+        elif c.oauth_user and g.RL_OAUTH_SITEWIDE_ENABLED:
+            max_reqs = g.RL_OAUTH_MAX_REQS
+            period = g.RL_OAUTH_RESET_SECONDS
+            # Convert client_id to ascii str for use as memcache key
+            client_id = c.oauth2_access_token.client_id.encode("ascii")
+            # OAuth2 ratelimits are per user-app combination
+            key = 'siterl-oauth-' + c.user._id36 + ":" + client_id
+        elif g.RL_SITEWIDE_ENABLED:
+            max_reqs = g.RL_MAX_REQS
+            period = g.RL_RESET_SECONDS
+            # API (non-oauth) limits are per-ip
+            key = 'siterl-api-' + request.ip
+        else:
+            # Not in a context where sitewide ratelimits are on
+            return
+
+        period_start, retry_after = _get_ratelimit_timeslice(period)
+        key += time.strftime("-%H%M%S", period_start)
+
+        g.ratelimitcache.add(key, 0, time=retry_after + 1)
+
+        # Increment the key to track the current request
+        recent_reqs = g.ratelimitcache.incr(key)
+        reqs_remaining = max(0, max_reqs - recent_reqs)
+
+        c.ratelimit_headers = {
+            "X-Ratelimit-Used": str(recent_reqs),
+            "X-Ratelimit-Reset": str(retry_after),
+            "X-Ratelimit-Remaining": str(reqs_remaining),
+        }
+
+        if reqs_remaining <= 0 and g.ENFORCE_RATELIMIT:
+            # For non-abort situations, the headers will be added in post(),
+            # to avoid including them in a pagecache
+            response.headers.update(c.ratelimit_headers)
+            abort(429)
+
    def pre(self):
        action = request.environ["pylons.routes_dict"].get("action")
        if action:
@@ -785,6 +850,9 @@ class MinimalController(BaseController):
        c.allow_loggedin_cache = False
        c.allow_framing = False

+        c.cdn_cacheable = (request.via_cdn and
+                           g.login_cookie not in request.cookies)
+
        # the domain has to be set before Cookies get initialized
        set_subreddit()
        c.errors = ErrorSet()
@@ -799,6 +867,10 @@ class MinimalController(BaseController):

        g.stats.count_string('user_agents', request.user_agent)

+        if not self.defer_ratelimiting:
+            self.run_sitewide_ratelimits()
+            c.request_timer.intermediate("minimal-ratelimits")
+
        hooks.get_hook("reddit.request.minimal_begin").call()

    def can_use_pagecache(self):
@@ -891,6 +963,9 @@ class MinimalController(BaseController):
                pagecache_state = "disallowed"
            response.headers["X-Reddit-Pagecache"] = pagecache_state

+        if c.ratelimit_headers:
+            response.headers.update(c.ratelimit_headers)
+
        # send cookies
        for k, v in c.cookies.iteritems():
            if v.dirty:
@@ -979,6 +1054,8 @@ class MinimalController(BaseController):


 class OAuth2ResourceController(MinimalController):
+    defer_ratelimiting = True
+
    def authenticate_with_token(self):
        set_extension(request.environ, "json")
        set_content_type()
@@ -1150,6 +1227,9 @@ class RedditController(OAuth2ResourceController):

        c.request_timer.intermediate("base-auth")

+        self.run_sitewide_ratelimits()
+        c.request_timer.intermediate("base-ratelimits")
+
        c.over18 = over18()
        set_obey_over18()

--- a/r2/r2/lib/app_globals.py
+++ b/r2/r2/lib/app_globals.py
@@ -125,7 +125,6 @@ class Globals(object):
            'REPLY_AGE_LIMIT',
            'REPORT_AGE_LIMIT',
            'HOT_PAGE_AGE',
-            'RATELIMIT',
            'QUOTA_THRESHOLD',
            'ADMIN_COOKIE_TTL',
            'ADMIN_COOKIE_MAX_IDLE',
@@ -154,6 +153,8 @@ class Globals(object):
            'wiki_max_page_separators',
            'min_promote_future',
            'max_promote_future',
+            'RL_RESET_MINUTES',
+            'RL_OAUTH_RESET_MINUTES',
        ],

        ConfigValue.float: [
@@ -161,6 +162,8 @@ class Globals(object):
            'max_promote_bid',
            'statsd_sample_rate',
            'querycache_prune_chance',
+            'RL_AVG_REQ_PER_SEC',
+            'RL_OAUTH_AVG_REQ_PER_SEC',
        ],

        ConfigValue.bool: [
@@ -187,6 +190,9 @@ class Globals(object):
            'shard_link_vote_queues',
            'shard_commentstree_queues',
            'subreddit_stylesheets_static',
+            'ENFORCE_RATELIMIT',
+            'RL_SITEWIDE_ENABLED',
+            'RL_OAUTH_SITEWIDE_ENABLED',
        ],

        ConfigValue.tuple: [
@@ -199,6 +205,7 @@ class Globals(object):
            'pagecaches',
            'memoizecaches',
            'srmembercaches',
+            'ratelimitcaches',
            'cassandra_seeds',
            'admins',
            'sponsors',
@@ -443,6 +450,15 @@ class Globals(object):

        locale.setlocale(locale.LC_ALL, self.locale)

+        # Pre-calculate ratelimit values
+        self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
+        self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
+                                      self.RL_RESET_SECONDS)
+
+        self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
+        self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
+                                     self.RL_OAUTH_RESET_SECONDS)
+
        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
@@ -497,6 +513,12 @@ class Globals(object):
            num_clients=num_mc_clients,
        )

+        ratelimitcaches = CMemcache(
+            self.ratelimitcaches,
+            min_compress_len=96,
+            num_clients=num_mc_clients,
+        )
+
        # a smaller pool of caches used only for distributed locks.
        # TODO: move this to ZooKeeper
        self.lock_cache = CMemcache(self.lockcaches,
@@ -615,6 +637,10 @@ class Globals(object):
                (localcache_cls(), srmembercaches))
        cache_chains.update(srmembercache=self.srmembercache)

+        self.ratelimitcache = MemcacheChain(
+                (localcache_cls(), ratelimitcaches))
+        cache_chains.update(ratelimitcaches=self.ratelimitcache)
+
        self.rendercache = MemcacheChain((
            localcache_cls(),
            rendercaches,
--- a/r2/r2/lib/base.py
+++ b/r2/r2/lib/base.py
@@ -90,12 +90,14 @@ class BaseController(WSGIController):
        forwarded_for = environ.get('HTTP_X_FORWARDED_FOR', ())
        remote_addr = environ.get('REMOTE_ADDR')

+        request.via_cdn = False
        if (g.secrets["true_ip"]
            and true_client_ip
            and ip_hash
            and hashlib.md5(true_client_ip + g.secrets["true_ip"]).hexdigest() \
            == ip_hash.lower()):
            request.ip = true_client_ip
+            request.via_cdn = True
        elif g.trust_local_proxies and forwarded_for and is_local_address(remote_addr):
            request.ip = forwarded_for.split(',')[-1]
        else:
--- a/r2/r2/lib/validator/validator.py
+++ b/r2/r2/lib/validator/validator.py
@@ -1605,7 +1605,7 @@ class VRatelimit(Validator):
                  seconds = None):
        to_set = {}
        if seconds is None:
-            seconds = g.RATELIMIT*60
+            seconds = g.RL_RESET_SECONDS
        expire_time = datetime.now(g.tz) + timedelta(seconds = seconds)
        if rate_user and c.user_is_loggedin:
            to_set['user' + str(c.user._id36)] = expire_time
@@ -1632,7 +1632,7 @@ class VDelay(Validator):
    @classmethod
    def record_violation(self, category, seconds = None, growfast=False):
        if seconds is None:
-            seconds = g.RATELIMIT*60
+            seconds = g.RL_RESET_SECONDS

        key = "VDelay-%s-%s" % (category, request.ip)
        prev_violations = g.memcache.get(key)