Add formal ratelimiting headers

Three headers can now be included for API requests:

X-Ratelimit-Used: Number of requests used in this period
X-Ratelimit-Remaining: Number of requests left to use
X-Ratelimit-Reset: Approximate number of seconds to end of period

Additionally, 429 responses can be returned for requests that
exceed the ratelimit.

Ratelimits are per-IP normally. OAuth clients will be
limited per user-client combo.
This commit is contained in:
Keith Mitchell
2014-02-18 17:35:55 -08:00
parent c918fdc587
commit 084f5736ce
6 changed files with 147 additions and 16 deletions

View File

@@ -204,12 +204,7 @@ embedly_api_key =
############################################ QUOTAS
# rate limiter duration (minutes)
RATELIMIT = 10
# user agent substrings to hard-ratelimit to a number of requests per ten second period
# example: agents = googlebot:10, appengine:2
agents =
# ratelimits for various types of relations creatable in subreddits
# quota for various types of relations creatable in subreddits
sr_banned_quota = 10000
sr_moderator_invite_quota = 10000
sr_contributor_quota = 10000
@@ -223,14 +218,39 @@ new_link_share_delay = 30 seconds
max_sr_images = 50
############################################ RATELIMITS
# If true, send 429 responses on exceeded ratelimits
# If false, send headers only, but don't abort
# Only applies if tracking is enabled below
ENFORCE_RATELIMIT = false
# If true, store per-user request counts in ratelimits cache
RL_SITEWIDE_ENABLED = true
# How large of a burst window will users be allowed?
RL_RESET_MINUTES = 10
# What is the average request rate over the above time period?
RL_AVG_REQ_PER_SEC = 0.5
# Same as above, but configured separately for connections via OAuth
RL_OAUTH_SITEWIDE_ENABLED = true
RL_OAUTH_RESET_MINUTES = 10
RL_OAUTH_AVG_REQ_PER_SEC = 0.5
# user agent substrings to hard-ratelimit to a number of requests per ten second period
# example: agents = googlebot:10, appengine:2
agents =
# karma needed to avoid per-subreddit submission ratelimits
MIN_RATE_LIMIT_KARMA = 10
MIN_RATE_LIMIT_COMMENT_KARMA = 1
############################################ THRESHOLDS
# minimum item score to be considered for quota baskets
QUOTA_THRESHOLD = 5
# if the user has positive total karma, their per-subreddit karma will default to this, else 0
MIN_UP_KARMA = 1
# karma needed to avoid per-subreddit submission ratelimits
MIN_RATE_LIMIT_KARMA = 10
MIN_RATE_LIMIT_COMMENT_KARMA = 1
# ages in days at which various actions are disallowed to preserve history
REPLY_AGE_LIMIT = 180
VOTE_AGE_LIMIT = 180
@@ -295,6 +315,8 @@ permacache_memcaches = 127.0.0.1:11211
srmembercaches = 127.0.0.1:11211
# a local cache that's not globally consistent and can have stale data (optional)
stalecaches =
# cache for tracking rate limit thresholds
ratelimitcaches = 127.0.0.1:11211
############################################ MISCELLANEOUS

View File

@@ -30,6 +30,7 @@ class APIv1Controller(OAuth2ResourceController):
def pre(self):
OAuth2ResourceController.pre(self)
self.check_for_bearer_token()
self.run_sitewide_ratelimits()
def try_pagecache(self):
pass

View File

@@ -570,15 +570,20 @@ def set_colors():
c.bordercolor = request.GET.get('bordercolor')
def _get_ratelimit_timeslice(slice_seconds):
slice_start, secs_since = divmod(time.time(), slice_seconds)
slice_start = time.gmtime(int(slice_start * slice_seconds))
secs_to_next = slice_seconds - int(secs_since)
return slice_start, secs_to_next
def ratelimit_agent(agent, limit=10, slice_size=10):
slice_size = min(slice_size, 60)
slice, remainder = map(int, divmod(time.time(), slice_size))
time_slice = time.gmtime(slice * slice_size)
time_slice, retry_after = _get_ratelimit_timeslice(slice_size)
key = "rate_agent_" + agent + time.strftime("_%S", time_slice)
g.cache.add(key, 0, time=slice_size + 1)
if g.cache.incr(key) > limit:
request.environ['retry_after'] = slice_size - remainder
request.environ['retry_after'] = retry_after
abort(429)
appengine_re = re.compile(r'AppEngine-Google; \(\+http://code.google.com/appengine; appid: (?:dev|s)~([a-z0-9-]{6,30})\)\Z')
@@ -733,6 +738,7 @@ def abort_with_error(error):
class MinimalController(BaseController):
allow_stylesheets = False
defer_ratelimiting = False
def request_key(self):
# note that this references the cookie at request time, not
@@ -758,6 +764,65 @@ class MinimalController(BaseController):
def cached_response(self):
return ""
def run_sitewide_ratelimits(self):
"""Ratelimit users and add ratelimit headers to the response.
Headers added are:
X-Ratelimit-Used: Number of requests used in this period
X-Ratelimit-Remaining: Number of requests left to use
X-Ratelimit-Reset: Approximate number of seconds to end of period
This function only has an effect if one of
g.RL_SITEWIDE_ENABLED or g.RL_OAUTH_SITEWIDE_ENABLED
are set to 'true' in the app configuration
If the ratelimit is exceeded, a 429 response will be sent,
unless the app configuration has g.ENFORCE_RATELIMIT off.
Headers will be sent even on aborted requests.
"""
if c.cdn_cacheable or not is_api():
# No ratelimiting or headers for:
# * Web requests (HTML)
# * CDN requests (logged out via www.reddit.com)
return
elif c.oauth_user and g.RL_OAUTH_SITEWIDE_ENABLED:
max_reqs = g.RL_OAUTH_MAX_REQS
period = g.RL_OAUTH_RESET_SECONDS
# Convert client_id to ascii str for use as memcache key
client_id = c.oauth2_access_token.client_id.encode("ascii")
# OAuth2 ratelimits are per user-app combination
key = 'siterl-oauth-' + c.user._id36 + ":" + client_id
elif g.RL_SITEWIDE_ENABLED:
max_reqs = g.RL_MAX_REQS
period = g.RL_RESET_SECONDS
# API (non-oauth) limits are per-ip
key = 'siterl-api-' + request.ip
else:
# Not in a context where sitewide ratelimits are on
return
period_start, retry_after = _get_ratelimit_timeslice(period)
key += time.strftime("-%H%M%S", period_start)
g.ratelimitcache.add(key, 0, time=retry_after + 1)
# Increment the key to track the current request
recent_reqs = g.ratelimitcache.incr(key)
reqs_remaining = max(0, max_reqs - recent_reqs)
c.ratelimit_headers = {
"X-Ratelimit-Used": str(recent_reqs),
"X-Ratelimit-Reset": str(retry_after),
"X-Ratelimit-Remaining": str(reqs_remaining),
}
if reqs_remaining <= 0 and g.ENFORCE_RATELIMIT:
# For non-abort situations, the headers will be added in post(),
# to avoid including them in a pagecache
response.headers.update(c.ratelimit_headers)
abort(429)
def pre(self):
action = request.environ["pylons.routes_dict"].get("action")
if action:
@@ -785,6 +850,9 @@ class MinimalController(BaseController):
c.allow_loggedin_cache = False
c.allow_framing = False
c.cdn_cacheable = (request.via_cdn and
g.login_cookie not in request.cookies)
# the domain has to be set before Cookies get initialized
set_subreddit()
c.errors = ErrorSet()
@@ -799,6 +867,10 @@ class MinimalController(BaseController):
g.stats.count_string('user_agents', request.user_agent)
if not self.defer_ratelimiting:
self.run_sitewide_ratelimits()
c.request_timer.intermediate("minimal-ratelimits")
hooks.get_hook("reddit.request.minimal_begin").call()
def can_use_pagecache(self):
@@ -891,6 +963,9 @@ class MinimalController(BaseController):
pagecache_state = "disallowed"
response.headers["X-Reddit-Pagecache"] = pagecache_state
if c.ratelimit_headers:
response.headers.update(c.ratelimit_headers)
# send cookies
for k, v in c.cookies.iteritems():
if v.dirty:
@@ -979,6 +1054,8 @@ class MinimalController(BaseController):
class OAuth2ResourceController(MinimalController):
defer_ratelimiting = True
def authenticate_with_token(self):
set_extension(request.environ, "json")
set_content_type()
@@ -1150,6 +1227,9 @@ class RedditController(OAuth2ResourceController):
c.request_timer.intermediate("base-auth")
self.run_sitewide_ratelimits()
c.request_timer.intermediate("base-ratelimits")
c.over18 = over18()
set_obey_over18()

View File

@@ -125,7 +125,6 @@ class Globals(object):
'REPLY_AGE_LIMIT',
'REPORT_AGE_LIMIT',
'HOT_PAGE_AGE',
'RATELIMIT',
'QUOTA_THRESHOLD',
'ADMIN_COOKIE_TTL',
'ADMIN_COOKIE_MAX_IDLE',
@@ -154,6 +153,8 @@ class Globals(object):
'wiki_max_page_separators',
'min_promote_future',
'max_promote_future',
'RL_RESET_MINUTES',
'RL_OAUTH_RESET_MINUTES',
],
ConfigValue.float: [
@@ -161,6 +162,8 @@ class Globals(object):
'max_promote_bid',
'statsd_sample_rate',
'querycache_prune_chance',
'RL_AVG_REQ_PER_SEC',
'RL_OAUTH_AVG_REQ_PER_SEC',
],
ConfigValue.bool: [
@@ -187,6 +190,9 @@ class Globals(object):
'shard_link_vote_queues',
'shard_commentstree_queues',
'subreddit_stylesheets_static',
'ENFORCE_RATELIMIT',
'RL_SITEWIDE_ENABLED',
'RL_OAUTH_SITEWIDE_ENABLED',
],
ConfigValue.tuple: [
@@ -199,6 +205,7 @@ class Globals(object):
'pagecaches',
'memoizecaches',
'srmembercaches',
'ratelimitcaches',
'cassandra_seeds',
'admins',
'sponsors',
@@ -443,6 +450,15 @@ class Globals(object):
locale.setlocale(locale.LC_ALL, self.locale)
# Pre-calculate ratelimit values
self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
self.RL_RESET_SECONDS)
self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
self.RL_OAUTH_RESET_SECONDS)
self.startup_timer.intermediate("configuration")
################# ZOOKEEPER
@@ -497,6 +513,12 @@ class Globals(object):
num_clients=num_mc_clients,
)
ratelimitcaches = CMemcache(
self.ratelimitcaches,
min_compress_len=96,
num_clients=num_mc_clients,
)
# a smaller pool of caches used only for distributed locks.
# TODO: move this to ZooKeeper
self.lock_cache = CMemcache(self.lockcaches,
@@ -615,6 +637,10 @@ class Globals(object):
(localcache_cls(), srmembercaches))
cache_chains.update(srmembercache=self.srmembercache)
self.ratelimitcache = MemcacheChain(
(localcache_cls(), ratelimitcaches))
cache_chains.update(ratelimitcaches=self.ratelimitcache)
self.rendercache = MemcacheChain((
localcache_cls(),
rendercaches,

View File

@@ -90,12 +90,14 @@ class BaseController(WSGIController):
forwarded_for = environ.get('HTTP_X_FORWARDED_FOR', ())
remote_addr = environ.get('REMOTE_ADDR')
request.via_cdn = False
if (g.secrets["true_ip"]
and true_client_ip
and ip_hash
and hashlib.md5(true_client_ip + g.secrets["true_ip"]).hexdigest() \
== ip_hash.lower()):
request.ip = true_client_ip
request.via_cdn = True
elif g.trust_local_proxies and forwarded_for and is_local_address(remote_addr):
request.ip = forwarded_for.split(',')[-1]
else:

View File

@@ -1605,7 +1605,7 @@ class VRatelimit(Validator):
seconds = None):
to_set = {}
if seconds is None:
seconds = g.RATELIMIT*60
seconds = g.RL_RESET_SECONDS
expire_time = datetime.now(g.tz) + timedelta(seconds = seconds)
if rate_user and c.user_is_loggedin:
to_set['user' + str(c.user._id36)] = expire_time
@@ -1632,7 +1632,7 @@ class VDelay(Validator):
@classmethod
def record_violation(self, category, seconds = None, growfast=False):
if seconds is None:
seconds = g.RATELIMIT*60
seconds = g.RL_RESET_SECONDS
key = "VDelay-%s-%s" % (category, request.ip)
prev_violations = g.memcache.get(key)