* lock Thing queries run with write_cache to prevent concurrent runs of

potentially expensive queries * don't look up default_subreddits in languages that we never store * start respecting Subreddit.allow_top again * fix assert syntax in cache.py
2026-01-26 23:39:11 -05:00 · 2009-07-17 10:51:44 -07:00
parent c612109da7
commit 045381ae32
4 changed files with 54 additions and 29 deletions
--- a/r2/r2/controllers/reddit_base.py
+++ b/r2/r2/controllers/reddit_base.py
@@ -291,14 +291,15 @@ def get_browser_langs():
        for l in langs:
            if ';' in l:
                l = l.split(';')[0]
-            if l not in seen_langs:
+            if l not in seen_langs and l in g.languages:
                browser_langs.append(l)
                seen_langs.add(l)
            if '-' in l:
                l = l.split('-')[0]
-            if l not in seen_langs:
+            if l not in seen_langs and l in g.languages:
                browser_langs.append(l)
                seen_langs.add(l)
+    browser_langs.sort()
    return browser_langs

 def set_host_lang():
--- a/r2/r2/lib/cache.py
+++ b/r2/r2/lib/cache.py
@@ -162,8 +162,10 @@ class CacheChain(CacheUtils, local):
    delete_multi = make_set_fn('delete_multi')
    flush_all = make_set_fn('flush_all')

-    def get(self, key, default=None):
+    def get(self, key, default = None, local = True):
        for c in self.caches:
+            if not local and isinstance(c,LocalCache):
+                continue
            val = c.get(key, default)
            if val is not None:
                #update other caches
@@ -211,31 +213,31 @@ def sgm(cache, keys, miss_fn, prefix='', time=0):
 def test_cache(cache):
    #basic set/get
    cache.set('1', 1)
-    assert(cache.get('1') == 1)
+    assert cache.get('1') == 1

    #python data
    cache.set('2', [1,2,3])
-    assert(cache.get('2') == [1,2,3])
+    assert cache.get('2') == [1,2,3]

    #set multi, no prefix
    cache.set_multi({'3':3, '4': 4})
-    assert(cache.get_multi(('3', '4')) == {'3':3, '4': 4})
+    assert cache.get_multi(('3', '4')) == {'3':3, '4': 4}

    #set multi, prefix
    cache.set_multi({'3':3, '4': 4}, prefix='p_')
-    assert(cache.get_multi(('3', 4), prefix='p_') == {'3':3, 4: 4})
-    assert(cache.get_multi(('p_3', 'p_4')) == {'p_3':3, 'p_4': 4})
+    assert cache.get_multi(('3', 4), prefix='p_') == {'3':3, 4: 4}
+    assert cache.get_multi(('p_3', 'p_4')) == {'p_3':3, 'p_4': 4}

    #incr
    cache.set('5', 1)
    cache.set('6', 1)
    cache.incr('5')
-    assert(cache.get('5'), 2)
+    assert cache.get('5') == 2
    cache.incr('5',2)
-    assert(cache.get('5'), 4)
+    assert cache.get('5') == 4
    cache.incr_multi(('5', '6'), 1)
-    assert(cache.get('5'), 5)    
-    assert(cache.get('6'), 2)
+    assert cache.get('5') == 5
+    assert cache.get('6') == 2

 # a cache that occasionally dumps itself to be used for long-running
 # processes
--- a/r2/r2/lib/db/thing.py
+++ b/r2/r2/lib/db/thing.py
@@ -775,24 +775,40 @@ class Query(object):
        if self._stats_collector:
            self._stats_collector.add(self)

-        lst = None
-        if self._read_cache:
-            names = cache.get(self._iden())
-            if names is not None:
-                lst = Thing._by_fullname(names, data = self._data, return_dict = False)
+        def _retrieve():
+            return self._cursor().fetchall()

-        if lst is None:
-            #hit the db
-            lst = self._cursor().fetchall()
+        names = lst = []
+
+        if not self._read_cache:
+            lst = _retrieve()
        else:
-            used_cache = True
+            names = cache.get(self._iden())
+            if names is None and not self._write_cache:
+                # it wasn't in the cache, and we're not going to
+                # replace it, so just hit the db
+                lst = _retrieve()
+            elif names is None and self._write_cache:
+                # it's not in the cache, and we have the power to
+                # update it, which we should do in a lock to prevent
+                # concurrent requests for the same data
+                with g.make_lock("lock_%s" % self._iden()):
+                    # see if it was set while we were waiting for our
+                    # lock
+                    names = cache.get(self._iden(), local = False)
+                    if not names:
+                        lst = _retrieve()
+                        cache.set(self._iden(),
+                                  [ x._fullname for x in lst ],
+                                  self._cache_time)

-        if self._write_cache and not used_cache:
-            names = tuple(i._fullname for i in lst)
-            cache.set(self._iden(), names, self._cache_time)
+        if names and not lst:
+            # we got our list of names from the cache, so we need to
+            # turn them back into Things
+            lst = Thing._by_fullname(names, data = self._data, return_dict = False)

-        for i in lst:
-            yield i
+        for item in lst:
+            yield item

 class Things(Query):
    def __init__(self, kind, *rules, **kw):
--- a/r2/r2/models/subreddit.py
+++ b/r2/r2/models/subreddit.py
@@ -276,9 +276,8 @@ class Subreddit(Thing, Printable):
        by popularity"""
        pop_reddits = Subreddit._query(Subreddit.c.type == ('public',
                                                            'restricted'),
-                                       # Subreddit.c.allow_top == True,
                                       sort=desc('_downs'),
-                                       limit = limit,
+                                       limit = limit * 1.5 if limit else None,
                                       data = True,
                                       read_cache = True,
                                       write_cache = True,
@@ -289,7 +288,14 @@ class Subreddit(Thing, Printable):
        if not c.over18:
            pop_reddits._filter(Subreddit.c.over_18 == False)

-        return list(pop_reddits)
+        # evaluate the query and remove the ones with
+        # allow_top==False.  Note that because this filtering is done
+        # after the query is run, if there are a lot of top reddits
+        # with allow_top==False, we may return fewer than `limit`
+        # results.
+        srs = filter(lambda sr: sr.allow_top, pop_reddits)
+
+        return srs[:limit] if limit else srs

    @classmethod
    def default_subreddits(cls, ids = True, limit = g.num_default_reddits):