Move IP-based throttling to ZooKeeper.

This buys us two things: one fewer memcache roundtrip per request to
determine if the IP is throttled or not, and the ability to use CIDR for
specifying blocks.

Google's ipaddress-py library as currently being modified (6f231f50aace)
for inclusion in the Python stdlib is included here to help us
manipulate CIDR ranges etc.
This commit is contained in:
Neil Williams
2012-08-03 13:09:34 -07:00
parent 34e57011b6
commit ced53a257a
5 changed files with 2258 additions and 6 deletions

View File

@@ -27,7 +27,7 @@ from pylons.i18n import _
from pylons.i18n.translation import LanguageError
from r2.lib.base import BaseController, proxyurl
from r2.lib import pages, utils, filters, amqp, stats
from r2.lib.utils import http_utils, is_subdomain, UniqueIterator
from r2.lib.utils import http_utils, is_subdomain, UniqueIterator, is_throttled
from r2.lib.cache import LocalCache, make_key, MemcachedError
import random as rand
from r2.models.account import valid_cookie, FakeAccount, valid_feed, valid_admin_cookie
@@ -464,12 +464,9 @@ def ratelimit_agents():
if s and user_agent and s in user_agent:
ratelimit_agent(s)
def throttled(key):
return g.cache.get("throttle_" + key)
def ratelimit_throttled():
ip = request.ip.strip()
if throttled(ip):
if is_throttled(ip):
abort(429)

View File

@@ -28,6 +28,7 @@ import signal
from datetime import timedelta, datetime
from urlparse import urlparse
import json
from r2.lib.contrib import ipaddress
from sqlalchemy import engine
from sqlalchemy import event
from r2.lib.configparse import ConfigValue, ConfigValueParser
@@ -265,16 +266,21 @@ class Globals(object):
# [live_config] section of the ini file
zk_hosts = self.config.get("zookeeper_connection_string")
if zk_hosts:
from r2.lib.zookeeper import connect_to_zookeeper, LiveConfig
from r2.lib.zookeeper import (connect_to_zookeeper,
LiveConfig, LiveList)
zk_username = self.config["zookeeper_username"]
zk_password = self.config["zookeeper_password"]
self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
zk_password))
self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
self.throttles = LiveList(self.zookeeper, "/throttles",
map_fn=ipaddress.ip_network,
reduce_fn=ipaddress.collapse_addresses)
else:
parser = ConfigParser.RawConfigParser()
parser.read([self.config["__file__"]])
self.live_config = extract_live_config(parser, self.plugins)
self.throttles = tuple() # immutable since it's not real
self.lock_cache = CMemcache(self.lockcaches, num_clients=num_mc_clients)
self.make_lock = make_lock_factory(self.lock_cache)

File diff suppressed because it is too large Load Diff

View File

@@ -38,6 +38,7 @@ from pylons import g
from pylons.i18n import ungettext, _
from r2.lib.filters import _force_unicode, _force_utf8
from mako.filters import url_escape
from r2.lib.contrib import ipaddress
import snudown
from r2.lib.utils._utils import *
@@ -1383,3 +1384,12 @@ def summarize_markdown(md):
first_graf, sep, rest = md.partition("\n\n")
return first_graf[:500]
def is_throttled(address):
"""Determine if an IP address is in a throttled range."""
addr = ipaddress.ip_address(address)
for network in g.throttles:
if addr in network:
return True
return False

View File

@@ -20,11 +20,14 @@
# Inc. All Rights Reserved.
###############################################################################
import os
import json
import urllib
import functools
from kazoo.client import KazooClient
from kazoo.security import make_digest_acl
from kazoo.exceptions import NoNodeException
def connect_to_zookeeper(hostlist, credentials):
@@ -68,3 +71,42 @@ class LiveConfig(object):
def __repr__(self):
return "<LiveConfig %r>" % self.data
class LiveList(object):
"""A mutable set shared by all apps and backed by ZooKeeper."""
def __init__(self, client, root, map_fn=None, reduce_fn=lambda L: L):
self.client = client
self.root = root
self.data = []
acl = [self.client.make_acl(read=True, create=True, delete=True)]
self.client.ensure_path(self.root, acl)
@client.ChildrenWatch(root)
def watcher(children):
unquoted = (urllib.unquote(c) for c in children)
mapped = map(map_fn, unquoted)
self.data = list(reduce_fn(mapped))
def _nodepath(self, item):
escaped = urllib.quote(str(item), safe=":")
return os.path.join(self.root, escaped)
def add(self, item):
path = self._nodepath(item)
self.client.ensure_path(path)
def remove(self, item):
path = self._nodepath(item)
try:
self.client.delete(path)
except NoNodeException:
raise ValueError("not in list")
def __iter__(self):
return iter(self.data)
def __repr__(self):
return "<LiveList %r>" % self.data