diff --git a/r2/example.ini b/r2/example.ini index 26a97e19d..a6e30ec41 100644 --- a/r2/example.ini +++ b/r2/example.ini @@ -204,12 +204,7 @@ embedly_api_key = ############################################ QUOTAS -# rate limiter duration (minutes) -RATELIMIT = 10 -# user agent substrings to hard-ratelimit to a number of requests per ten second period -# example: agents = googlebot:10, appengine:2 -agents = -# ratelimits for various types of relations creatable in subreddits +# quota for various types of relations creatable in subreddits sr_banned_quota = 10000 sr_moderator_invite_quota = 10000 sr_contributor_quota = 10000 @@ -223,14 +218,39 @@ new_link_share_delay = 30 seconds max_sr_images = 50 +############################################ RATELIMITS + +# If true, send 429 responses on exceeded ratelimits +# If false, send headers only, but don't abort +# Only applies if tracking is enabled below +ENFORCE_RATELIMIT = false + +# If true, store per-user request counts in ratelimits cache +RL_SITEWIDE_ENABLED = true +# How large of a burst window will users be allowed? +RL_RESET_MINUTES = 10 +# What is the average request rate over the above time period? +RL_AVG_REQ_PER_SEC = 0.5 + +# Same as above, but configured separately for connections via OAuth +RL_OAUTH_SITEWIDE_ENABLED = true +RL_OAUTH_RESET_MINUTES = 10 +RL_OAUTH_AVG_REQ_PER_SEC = 0.5 + +# user agent substrings to hard-ratelimit to a number of requests per ten second period +# example: agents = googlebot:10, appengine:2 +agents = + +# karma needed to avoid per-subreddit submission ratelimits +MIN_RATE_LIMIT_KARMA = 10 +MIN_RATE_LIMIT_COMMENT_KARMA = 1 + + ############################################ THRESHOLDS # minimum item score to be considered for quota baskets QUOTA_THRESHOLD = 5 # if the user has positive total karma, their per-subreddit karma will default to this, else 0 MIN_UP_KARMA = 1 -# karma needed to avoid per-subreddit submission ratelimits -MIN_RATE_LIMIT_KARMA = 10 -MIN_RATE_LIMIT_COMMENT_KARMA = 1 # ages in days at which various actions are disallowed to preserve history REPLY_AGE_LIMIT = 180 VOTE_AGE_LIMIT = 180 @@ -295,6 +315,8 @@ permacache_memcaches = 127.0.0.1:11211 srmembercaches = 127.0.0.1:11211 # a local cache that's not globally consistent and can have stale data (optional) stalecaches = +# cache for tracking rate limit thresholds +ratelimitcaches = 127.0.0.1:11211 ############################################ MISCELLANEOUS diff --git a/r2/r2/controllers/apiv1.py b/r2/r2/controllers/apiv1.py index d77e728be..7e37aac05 100644 --- a/r2/r2/controllers/apiv1.py +++ b/r2/r2/controllers/apiv1.py @@ -30,6 +30,7 @@ class APIv1Controller(OAuth2ResourceController): def pre(self): OAuth2ResourceController.pre(self) self.check_for_bearer_token() + self.run_sitewide_ratelimits() def try_pagecache(self): pass diff --git a/r2/r2/controllers/reddit_base.py b/r2/r2/controllers/reddit_base.py index 9640ea930..b8fa217aa 100644 --- a/r2/r2/controllers/reddit_base.py +++ b/r2/r2/controllers/reddit_base.py @@ -570,15 +570,20 @@ def set_colors(): c.bordercolor = request.GET.get('bordercolor') +def _get_ratelimit_timeslice(slice_seconds): + slice_start, secs_since = divmod(time.time(), slice_seconds) + slice_start = time.gmtime(int(slice_start * slice_seconds)) + secs_to_next = slice_seconds - int(secs_since) + return slice_start, secs_to_next + + def ratelimit_agent(agent, limit=10, slice_size=10): slice_size = min(slice_size, 60) - slice, remainder = map(int, divmod(time.time(), slice_size)) - time_slice = time.gmtime(slice * slice_size) + time_slice, retry_after = _get_ratelimit_timeslice(slice_size) key = "rate_agent_" + agent + time.strftime("_%S", time_slice) - g.cache.add(key, 0, time=slice_size + 1) if g.cache.incr(key) > limit: - request.environ['retry_after'] = slice_size - remainder + request.environ['retry_after'] = retry_after abort(429) appengine_re = re.compile(r'AppEngine-Google; \(\+http://code.google.com/appengine; appid: (?:dev|s)~([a-z0-9-]{6,30})\)\Z') @@ -733,6 +738,7 @@ def abort_with_error(error): class MinimalController(BaseController): allow_stylesheets = False + defer_ratelimiting = False def request_key(self): # note that this references the cookie at request time, not @@ -758,6 +764,65 @@ class MinimalController(BaseController): def cached_response(self): return "" + def run_sitewide_ratelimits(self): + """Ratelimit users and add ratelimit headers to the response. + + Headers added are: + X-Ratelimit-Used: Number of requests used in this period + X-Ratelimit-Remaining: Number of requests left to use + X-Ratelimit-Reset: Approximate number of seconds to end of period + + This function only has an effect if one of + g.RL_SITEWIDE_ENABLED or g.RL_OAUTH_SITEWIDE_ENABLED + are set to 'true' in the app configuration + + If the ratelimit is exceeded, a 429 response will be sent, + unless the app configuration has g.ENFORCE_RATELIMIT off. + Headers will be sent even on aborted requests. + + """ + if c.cdn_cacheable or not is_api(): + # No ratelimiting or headers for: + # * Web requests (HTML) + # * CDN requests (logged out via www.reddit.com) + return + elif c.oauth_user and g.RL_OAUTH_SITEWIDE_ENABLED: + max_reqs = g.RL_OAUTH_MAX_REQS + period = g.RL_OAUTH_RESET_SECONDS + # Convert client_id to ascii str for use as memcache key + client_id = c.oauth2_access_token.client_id.encode("ascii") + # OAuth2 ratelimits are per user-app combination + key = 'siterl-oauth-' + c.user._id36 + ":" + client_id + elif g.RL_SITEWIDE_ENABLED: + max_reqs = g.RL_MAX_REQS + period = g.RL_RESET_SECONDS + # API (non-oauth) limits are per-ip + key = 'siterl-api-' + request.ip + else: + # Not in a context where sitewide ratelimits are on + return + + period_start, retry_after = _get_ratelimit_timeslice(period) + key += time.strftime("-%H%M%S", period_start) + + g.ratelimitcache.add(key, 0, time=retry_after + 1) + + # Increment the key to track the current request + recent_reqs = g.ratelimitcache.incr(key) + reqs_remaining = max(0, max_reqs - recent_reqs) + + c.ratelimit_headers = { + "X-Ratelimit-Used": str(recent_reqs), + "X-Ratelimit-Reset": str(retry_after), + "X-Ratelimit-Remaining": str(reqs_remaining), + } + + if reqs_remaining <= 0 and g.ENFORCE_RATELIMIT: + # For non-abort situations, the headers will be added in post(), + # to avoid including them in a pagecache + response.headers.update(c.ratelimit_headers) + abort(429) + def pre(self): action = request.environ["pylons.routes_dict"].get("action") if action: @@ -785,6 +850,9 @@ class MinimalController(BaseController): c.allow_loggedin_cache = False c.allow_framing = False + c.cdn_cacheable = (request.via_cdn and + g.login_cookie not in request.cookies) + # the domain has to be set before Cookies get initialized set_subreddit() c.errors = ErrorSet() @@ -799,6 +867,10 @@ class MinimalController(BaseController): g.stats.count_string('user_agents', request.user_agent) + if not self.defer_ratelimiting: + self.run_sitewide_ratelimits() + c.request_timer.intermediate("minimal-ratelimits") + hooks.get_hook("reddit.request.minimal_begin").call() def can_use_pagecache(self): @@ -891,6 +963,9 @@ class MinimalController(BaseController): pagecache_state = "disallowed" response.headers["X-Reddit-Pagecache"] = pagecache_state + if c.ratelimit_headers: + response.headers.update(c.ratelimit_headers) + # send cookies for k, v in c.cookies.iteritems(): if v.dirty: @@ -979,6 +1054,8 @@ class MinimalController(BaseController): class OAuth2ResourceController(MinimalController): + defer_ratelimiting = True + def authenticate_with_token(self): set_extension(request.environ, "json") set_content_type() @@ -1150,6 +1227,9 @@ class RedditController(OAuth2ResourceController): c.request_timer.intermediate("base-auth") + self.run_sitewide_ratelimits() + c.request_timer.intermediate("base-ratelimits") + c.over18 = over18() set_obey_over18() diff --git a/r2/r2/lib/app_globals.py b/r2/r2/lib/app_globals.py index 87bf0e49c..7b34390a6 100755 --- a/r2/r2/lib/app_globals.py +++ b/r2/r2/lib/app_globals.py @@ -125,7 +125,6 @@ class Globals(object): 'REPLY_AGE_LIMIT', 'REPORT_AGE_LIMIT', 'HOT_PAGE_AGE', - 'RATELIMIT', 'QUOTA_THRESHOLD', 'ADMIN_COOKIE_TTL', 'ADMIN_COOKIE_MAX_IDLE', @@ -154,6 +153,8 @@ class Globals(object): 'wiki_max_page_separators', 'min_promote_future', 'max_promote_future', + 'RL_RESET_MINUTES', + 'RL_OAUTH_RESET_MINUTES', ], ConfigValue.float: [ @@ -161,6 +162,8 @@ class Globals(object): 'max_promote_bid', 'statsd_sample_rate', 'querycache_prune_chance', + 'RL_AVG_REQ_PER_SEC', + 'RL_OAUTH_AVG_REQ_PER_SEC', ], ConfigValue.bool: [ @@ -187,6 +190,9 @@ class Globals(object): 'shard_link_vote_queues', 'shard_commentstree_queues', 'subreddit_stylesheets_static', + 'ENFORCE_RATELIMIT', + 'RL_SITEWIDE_ENABLED', + 'RL_OAUTH_SITEWIDE_ENABLED', ], ConfigValue.tuple: [ @@ -199,6 +205,7 @@ class Globals(object): 'pagecaches', 'memoizecaches', 'srmembercaches', + 'ratelimitcaches', 'cassandra_seeds', 'admins', 'sponsors', @@ -443,6 +450,15 @@ class Globals(object): locale.setlocale(locale.LC_ALL, self.locale) + # Pre-calculate ratelimit values + self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60 + self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] * + self.RL_RESET_SECONDS) + + self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60 + self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] * + self.RL_OAUTH_RESET_SECONDS) + self.startup_timer.intermediate("configuration") ################# ZOOKEEPER @@ -497,6 +513,12 @@ class Globals(object): num_clients=num_mc_clients, ) + ratelimitcaches = CMemcache( + self.ratelimitcaches, + min_compress_len=96, + num_clients=num_mc_clients, + ) + # a smaller pool of caches used only for distributed locks. # TODO: move this to ZooKeeper self.lock_cache = CMemcache(self.lockcaches, @@ -615,6 +637,10 @@ class Globals(object): (localcache_cls(), srmembercaches)) cache_chains.update(srmembercache=self.srmembercache) + self.ratelimitcache = MemcacheChain( + (localcache_cls(), ratelimitcaches)) + cache_chains.update(ratelimitcaches=self.ratelimitcache) + self.rendercache = MemcacheChain(( localcache_cls(), rendercaches, diff --git a/r2/r2/lib/base.py b/r2/r2/lib/base.py index c0eb00a4b..ed07d5685 100644 --- a/r2/r2/lib/base.py +++ b/r2/r2/lib/base.py @@ -90,12 +90,14 @@ class BaseController(WSGIController): forwarded_for = environ.get('HTTP_X_FORWARDED_FOR', ()) remote_addr = environ.get('REMOTE_ADDR') + request.via_cdn = False if (g.secrets["true_ip"] and true_client_ip and ip_hash and hashlib.md5(true_client_ip + g.secrets["true_ip"]).hexdigest() \ == ip_hash.lower()): request.ip = true_client_ip + request.via_cdn = True elif g.trust_local_proxies and forwarded_for and is_local_address(remote_addr): request.ip = forwarded_for.split(',')[-1] else: diff --git a/r2/r2/lib/validator/validator.py b/r2/r2/lib/validator/validator.py index 6b94ef653..5a66794a3 100644 --- a/r2/r2/lib/validator/validator.py +++ b/r2/r2/lib/validator/validator.py @@ -1605,7 +1605,7 @@ class VRatelimit(Validator): seconds = None): to_set = {} if seconds is None: - seconds = g.RATELIMIT*60 + seconds = g.RL_RESET_SECONDS expire_time = datetime.now(g.tz) + timedelta(seconds = seconds) if rate_user and c.user_is_loggedin: to_set['user' + str(c.user._id36)] = expire_time @@ -1632,7 +1632,7 @@ class VDelay(Validator): @classmethod def record_violation(self, category, seconds = None, growfast=False): if seconds is None: - seconds = g.RATELIMIT*60 + seconds = g.RL_RESET_SECONDS key = "VDelay-%s-%s" % (category, request.ip) prev_violations = g.memcache.get(key)