mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-04-05 03:00:15 -04:00
Move IP-based throttling to ZooKeeper.
This buys us two things: one fewer memcache roundtrip per request to determine if the IP is throttled or not, and the ability to use CIDR for specifying blocks. Google's ipaddress-py library as currently being modified (6f231f50aace) for inclusion in the Python stdlib is included here to help us manipulate CIDR ranges etc.
This commit is contained in:
@@ -27,7 +27,7 @@ from pylons.i18n import _
|
||||
from pylons.i18n.translation import LanguageError
|
||||
from r2.lib.base import BaseController, proxyurl
|
||||
from r2.lib import pages, utils, filters, amqp, stats
|
||||
from r2.lib.utils import http_utils, is_subdomain, UniqueIterator
|
||||
from r2.lib.utils import http_utils, is_subdomain, UniqueIterator, is_throttled
|
||||
from r2.lib.cache import LocalCache, make_key, MemcachedError
|
||||
import random as rand
|
||||
from r2.models.account import valid_cookie, FakeAccount, valid_feed, valid_admin_cookie
|
||||
@@ -464,12 +464,9 @@ def ratelimit_agents():
|
||||
if s and user_agent and s in user_agent:
|
||||
ratelimit_agent(s)
|
||||
|
||||
def throttled(key):
|
||||
return g.cache.get("throttle_" + key)
|
||||
|
||||
def ratelimit_throttled():
|
||||
ip = request.ip.strip()
|
||||
if throttled(ip):
|
||||
if is_throttled(ip):
|
||||
abort(429)
|
||||
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ import signal
|
||||
from datetime import timedelta, datetime
|
||||
from urlparse import urlparse
|
||||
import json
|
||||
from r2.lib.contrib import ipaddress
|
||||
from sqlalchemy import engine
|
||||
from sqlalchemy import event
|
||||
from r2.lib.configparse import ConfigValue, ConfigValueParser
|
||||
@@ -265,16 +266,21 @@ class Globals(object):
|
||||
# [live_config] section of the ini file
|
||||
zk_hosts = self.config.get("zookeeper_connection_string")
|
||||
if zk_hosts:
|
||||
from r2.lib.zookeeper import connect_to_zookeeper, LiveConfig
|
||||
from r2.lib.zookeeper import (connect_to_zookeeper,
|
||||
LiveConfig, LiveList)
|
||||
zk_username = self.config["zookeeper_username"]
|
||||
zk_password = self.config["zookeeper_password"]
|
||||
self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
|
||||
zk_password))
|
||||
self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
|
||||
self.throttles = LiveList(self.zookeeper, "/throttles",
|
||||
map_fn=ipaddress.ip_network,
|
||||
reduce_fn=ipaddress.collapse_addresses)
|
||||
else:
|
||||
parser = ConfigParser.RawConfigParser()
|
||||
parser.read([self.config["__file__"]])
|
||||
self.live_config = extract_live_config(parser, self.plugins)
|
||||
self.throttles = tuple() # immutable since it's not real
|
||||
|
||||
self.lock_cache = CMemcache(self.lockcaches, num_clients=num_mc_clients)
|
||||
self.make_lock = make_lock_factory(self.lock_cache)
|
||||
|
||||
2197
r2/r2/lib/contrib/ipaddress.py
Normal file
2197
r2/r2/lib/contrib/ipaddress.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -38,6 +38,7 @@ from pylons import g
|
||||
from pylons.i18n import ungettext, _
|
||||
from r2.lib.filters import _force_unicode, _force_utf8
|
||||
from mako.filters import url_escape
|
||||
from r2.lib.contrib import ipaddress
|
||||
import snudown
|
||||
|
||||
from r2.lib.utils._utils import *
|
||||
@@ -1383,3 +1384,12 @@ def summarize_markdown(md):
|
||||
|
||||
first_graf, sep, rest = md.partition("\n\n")
|
||||
return first_graf[:500]
|
||||
|
||||
|
||||
def is_throttled(address):
|
||||
"""Determine if an IP address is in a throttled range."""
|
||||
addr = ipaddress.ip_address(address)
|
||||
for network in g.throttles:
|
||||
if addr in network:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -20,11 +20,14 @@
|
||||
# Inc. All Rights Reserved.
|
||||
###############################################################################
|
||||
|
||||
import os
|
||||
import json
|
||||
import urllib
|
||||
import functools
|
||||
|
||||
from kazoo.client import KazooClient
|
||||
from kazoo.security import make_digest_acl
|
||||
from kazoo.exceptions import NoNodeException
|
||||
|
||||
|
||||
def connect_to_zookeeper(hostlist, credentials):
|
||||
@@ -68,3 +71,42 @@ class LiveConfig(object):
|
||||
|
||||
def __repr__(self):
|
||||
return "<LiveConfig %r>" % self.data
|
||||
|
||||
|
||||
class LiveList(object):
|
||||
"""A mutable set shared by all apps and backed by ZooKeeper."""
|
||||
def __init__(self, client, root, map_fn=None, reduce_fn=lambda L: L):
|
||||
self.client = client
|
||||
self.root = root
|
||||
self.data = []
|
||||
|
||||
acl = [self.client.make_acl(read=True, create=True, delete=True)]
|
||||
self.client.ensure_path(self.root, acl)
|
||||
|
||||
@client.ChildrenWatch(root)
|
||||
def watcher(children):
|
||||
unquoted = (urllib.unquote(c) for c in children)
|
||||
mapped = map(map_fn, unquoted)
|
||||
self.data = list(reduce_fn(mapped))
|
||||
|
||||
def _nodepath(self, item):
|
||||
escaped = urllib.quote(str(item), safe=":")
|
||||
return os.path.join(self.root, escaped)
|
||||
|
||||
def add(self, item):
|
||||
path = self._nodepath(item)
|
||||
self.client.ensure_path(path)
|
||||
|
||||
def remove(self, item):
|
||||
path = self._nodepath(item)
|
||||
|
||||
try:
|
||||
self.client.delete(path)
|
||||
except NoNodeException:
|
||||
raise ValueError("not in list")
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.data)
|
||||
|
||||
def __repr__(self):
|
||||
return "<LiveList %r>" % self.data
|
||||
|
||||
Reference in New Issue
Block a user