From 045381ae32ca690b13339c9aa84c73633f2bb0ac Mon Sep 17 00:00:00 2001 From: ketralnis Date: Fri, 17 Jul 2009 10:51:44 -0700 Subject: [PATCH] * lock Thing queries run with write_cache to prevent concurrent runs of potentially expensive queries * don't look up default_subreddits in languages that we never store * start respecting Subreddit.allow_top again * fix assert syntax in cache.py --- r2/r2/controllers/reddit_base.py | 5 ++-- r2/r2/lib/cache.py | 22 ++++++++-------- r2/r2/lib/db/thing.py | 44 ++++++++++++++++++++++---------- r2/r2/models/subreddit.py | 12 ++++++--- 4 files changed, 54 insertions(+), 29 deletions(-) diff --git a/r2/r2/controllers/reddit_base.py b/r2/r2/controllers/reddit_base.py index d55710673..c9a2f5083 100644 --- a/r2/r2/controllers/reddit_base.py +++ b/r2/r2/controllers/reddit_base.py @@ -291,14 +291,15 @@ def get_browser_langs(): for l in langs: if ';' in l: l = l.split(';')[0] - if l not in seen_langs: + if l not in seen_langs and l in g.languages: browser_langs.append(l) seen_langs.add(l) if '-' in l: l = l.split('-')[0] - if l not in seen_langs: + if l not in seen_langs and l in g.languages: browser_langs.append(l) seen_langs.add(l) + browser_langs.sort() return browser_langs def set_host_lang(): diff --git a/r2/r2/lib/cache.py b/r2/r2/lib/cache.py index d887a173e..d63052941 100644 --- a/r2/r2/lib/cache.py +++ b/r2/r2/lib/cache.py @@ -162,8 +162,10 @@ class CacheChain(CacheUtils, local): delete_multi = make_set_fn('delete_multi') flush_all = make_set_fn('flush_all') - def get(self, key, default=None): + def get(self, key, default = None, local = True): for c in self.caches: + if not local and isinstance(c,LocalCache): + continue val = c.get(key, default) if val is not None: #update other caches @@ -211,31 +213,31 @@ def sgm(cache, keys, miss_fn, prefix='', time=0): def test_cache(cache): #basic set/get cache.set('1', 1) - assert(cache.get('1') == 1) + assert cache.get('1') == 1 #python data cache.set('2', [1,2,3]) - assert(cache.get('2') == [1,2,3]) + assert cache.get('2') == [1,2,3] #set multi, no prefix cache.set_multi({'3':3, '4': 4}) - assert(cache.get_multi(('3', '4')) == {'3':3, '4': 4}) + assert cache.get_multi(('3', '4')) == {'3':3, '4': 4} #set multi, prefix cache.set_multi({'3':3, '4': 4}, prefix='p_') - assert(cache.get_multi(('3', 4), prefix='p_') == {'3':3, 4: 4}) - assert(cache.get_multi(('p_3', 'p_4')) == {'p_3':3, 'p_4': 4}) + assert cache.get_multi(('3', 4), prefix='p_') == {'3':3, 4: 4} + assert cache.get_multi(('p_3', 'p_4')) == {'p_3':3, 'p_4': 4} #incr cache.set('5', 1) cache.set('6', 1) cache.incr('5') - assert(cache.get('5'), 2) + assert cache.get('5') == 2 cache.incr('5',2) - assert(cache.get('5'), 4) + assert cache.get('5') == 4 cache.incr_multi(('5', '6'), 1) - assert(cache.get('5'), 5) - assert(cache.get('6'), 2) + assert cache.get('5') == 5 + assert cache.get('6') == 2 # a cache that occasionally dumps itself to be used for long-running # processes diff --git a/r2/r2/lib/db/thing.py b/r2/r2/lib/db/thing.py index 1f325a522..f20dca7e2 100644 --- a/r2/r2/lib/db/thing.py +++ b/r2/r2/lib/db/thing.py @@ -775,24 +775,40 @@ class Query(object): if self._stats_collector: self._stats_collector.add(self) - lst = None - if self._read_cache: - names = cache.get(self._iden()) - if names is not None: - lst = Thing._by_fullname(names, data = self._data, return_dict = False) + def _retrieve(): + return self._cursor().fetchall() - if lst is None: - #hit the db - lst = self._cursor().fetchall() + names = lst = [] + + if not self._read_cache: + lst = _retrieve() else: - used_cache = True + names = cache.get(self._iden()) + if names is None and not self._write_cache: + # it wasn't in the cache, and we're not going to + # replace it, so just hit the db + lst = _retrieve() + elif names is None and self._write_cache: + # it's not in the cache, and we have the power to + # update it, which we should do in a lock to prevent + # concurrent requests for the same data + with g.make_lock("lock_%s" % self._iden()): + # see if it was set while we were waiting for our + # lock + names = cache.get(self._iden(), local = False) + if not names: + lst = _retrieve() + cache.set(self._iden(), + [ x._fullname for x in lst ], + self._cache_time) - if self._write_cache and not used_cache: - names = tuple(i._fullname for i in lst) - cache.set(self._iden(), names, self._cache_time) + if names and not lst: + # we got our list of names from the cache, so we need to + # turn them back into Things + lst = Thing._by_fullname(names, data = self._data, return_dict = False) - for i in lst: - yield i + for item in lst: + yield item class Things(Query): def __init__(self, kind, *rules, **kw): diff --git a/r2/r2/models/subreddit.py b/r2/r2/models/subreddit.py index 1d3c19c88..018aca1f7 100644 --- a/r2/r2/models/subreddit.py +++ b/r2/r2/models/subreddit.py @@ -276,9 +276,8 @@ class Subreddit(Thing, Printable): by popularity""" pop_reddits = Subreddit._query(Subreddit.c.type == ('public', 'restricted'), - # Subreddit.c.allow_top == True, sort=desc('_downs'), - limit = limit, + limit = limit * 1.5 if limit else None, data = True, read_cache = True, write_cache = True, @@ -289,7 +288,14 @@ class Subreddit(Thing, Printable): if not c.over18: pop_reddits._filter(Subreddit.c.over_18 == False) - return list(pop_reddits) + # evaluate the query and remove the ones with + # allow_top==False. Note that because this filtering is done + # after the query is run, if there are a lot of top reddits + # with allow_top==False, we may return fewer than `limit` + # results. + srs = filter(lambda sr: sr.allow_top, pop_reddits) + + return srs[:limit] if limit else srs @classmethod def default_subreddits(cls, ids = True, limit = g.num_default_reddits):