From 045381ae32ca690b13339c9aa84c73633f2bb0ac Mon Sep 17 00:00:00 2001
From: ketralnis <ketralnis@reddit.com>
Date: Fri, 17 Jul 2009 10:51:44 -0700
Subject: [PATCH] * lock Thing queries run with write_cache to prevent
 concurrent runs of   potentially expensive queries * don't look up
 default_subreddits in languages that we never store * start respecting
 Subreddit.allow_top again * fix assert syntax in cache.py

---
 r2/r2/controllers/reddit_base.py |  5 ++--
 r2/r2/lib/cache.py               | 22 ++++++++--------
 r2/r2/lib/db/thing.py            | 44 ++++++++++++++++++++++----------
 r2/r2/models/subreddit.py        | 12 ++++++---
 4 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/r2/r2/controllers/reddit_base.py b/r2/r2/controllers/reddit_base.py
index d55710673..c9a2f5083 100644
--- a/r2/r2/controllers/reddit_base.py
+++ b/r2/r2/controllers/reddit_base.py
@@ -291,14 +291,15 @@ def get_browser_langs():
         for l in langs:
             if ';' in l:
                 l = l.split(';')[0]
-            if l not in seen_langs:
+            if l not in seen_langs and l in g.languages:
                 browser_langs.append(l)
                 seen_langs.add(l)
             if '-' in l:
                 l = l.split('-')[0]
-            if l not in seen_langs:
+            if l not in seen_langs and l in g.languages:
                 browser_langs.append(l)
                 seen_langs.add(l)
+    browser_langs.sort()
     return browser_langs
 
 def set_host_lang():
diff --git a/r2/r2/lib/cache.py b/r2/r2/lib/cache.py
index d887a173e..d63052941 100644
--- a/r2/r2/lib/cache.py
+++ b/r2/r2/lib/cache.py
@@ -162,8 +162,10 @@ class CacheChain(CacheUtils, local):
     delete_multi = make_set_fn('delete_multi')
     flush_all = make_set_fn('flush_all')
 
-    def get(self, key, default=None):
+    def get(self, key, default = None, local = True):
         for c in self.caches:
+            if not local and isinstance(c,LocalCache):
+                continue
             val = c.get(key, default)
             if val is not None:
                 #update other caches
@@ -211,31 +213,31 @@ def sgm(cache, keys, miss_fn, prefix='', time=0):
 def test_cache(cache):
     #basic set/get
     cache.set('1', 1)
-    assert(cache.get('1') == 1)
+    assert cache.get('1') == 1
 
     #python data
     cache.set('2', [1,2,3])
-    assert(cache.get('2') == [1,2,3])
+    assert cache.get('2') == [1,2,3]
 
     #set multi, no prefix
     cache.set_multi({'3':3, '4': 4})
-    assert(cache.get_multi(('3', '4')) == {'3':3, '4': 4})
+    assert cache.get_multi(('3', '4')) == {'3':3, '4': 4}
 
     #set multi, prefix
     cache.set_multi({'3':3, '4': 4}, prefix='p_')
-    assert(cache.get_multi(('3', 4), prefix='p_') == {'3':3, 4: 4})
-    assert(cache.get_multi(('p_3', 'p_4')) == {'p_3':3, 'p_4': 4})
+    assert cache.get_multi(('3', 4), prefix='p_') == {'3':3, 4: 4}
+    assert cache.get_multi(('p_3', 'p_4')) == {'p_3':3, 'p_4': 4}
 
     #incr
     cache.set('5', 1)
     cache.set('6', 1)
     cache.incr('5')
-    assert(cache.get('5'), 2)
+    assert cache.get('5') == 2
     cache.incr('5',2)
-    assert(cache.get('5'), 4)
+    assert cache.get('5') == 4
     cache.incr_multi(('5', '6'), 1)
-    assert(cache.get('5'), 5)    
-    assert(cache.get('6'), 2)
+    assert cache.get('5') == 5
+    assert cache.get('6') == 2
 
 # a cache that occasionally dumps itself to be used for long-running
 # processes
diff --git a/r2/r2/lib/db/thing.py b/r2/r2/lib/db/thing.py
index 1f325a522..f20dca7e2 100644
--- a/r2/r2/lib/db/thing.py
+++ b/r2/r2/lib/db/thing.py
@@ -775,24 +775,40 @@ class Query(object):
         if self._stats_collector:
             self._stats_collector.add(self)
 
-        lst = None
-        if self._read_cache:
-            names = cache.get(self._iden())
-            if names is not None:
-                lst = Thing._by_fullname(names, data = self._data, return_dict = False)
+        def _retrieve():
+            return self._cursor().fetchall()
 
-        if lst is None:
-            #hit the db
-            lst = self._cursor().fetchall()
+        names = lst = []
+
+        if not self._read_cache:
+            lst = _retrieve()
         else:
-            used_cache = True
+            names = cache.get(self._iden())
+            if names is None and not self._write_cache:
+                # it wasn't in the cache, and we're not going to
+                # replace it, so just hit the db
+                lst = _retrieve()
+            elif names is None and self._write_cache:
+                # it's not in the cache, and we have the power to
+                # update it, which we should do in a lock to prevent
+                # concurrent requests for the same data
+                with g.make_lock("lock_%s" % self._iden()):
+                    # see if it was set while we were waiting for our
+                    # lock
+                    names = cache.get(self._iden(), local = False)
+                    if not names:
+                        lst = _retrieve()
+                        cache.set(self._iden(),
+                                  [ x._fullname for x in lst ],
+                                  self._cache_time)
 
-        if self._write_cache and not used_cache:
-            names = tuple(i._fullname for i in lst)
-            cache.set(self._iden(), names, self._cache_time)
+        if names and not lst:
+            # we got our list of names from the cache, so we need to
+            # turn them back into Things
+            lst = Thing._by_fullname(names, data = self._data, return_dict = False)
 
-        for i in lst:
-            yield i
+        for item in lst:
+            yield item
 
 class Things(Query):
     def __init__(self, kind, *rules, **kw):
diff --git a/r2/r2/models/subreddit.py b/r2/r2/models/subreddit.py
index 1d3c19c88..018aca1f7 100644
--- a/r2/r2/models/subreddit.py
+++ b/r2/r2/models/subreddit.py
@@ -276,9 +276,8 @@ class Subreddit(Thing, Printable):
         by popularity"""
         pop_reddits = Subreddit._query(Subreddit.c.type == ('public',
                                                             'restricted'),
-                                       # Subreddit.c.allow_top == True,
                                        sort=desc('_downs'),
-                                       limit = limit,
+                                       limit = limit * 1.5 if limit else None,
                                        data = True,
                                        read_cache = True,
                                        write_cache = True,
@@ -289,7 +288,14 @@ class Subreddit(Thing, Printable):
         if not c.over18:
             pop_reddits._filter(Subreddit.c.over_18 == False)
 
-        return list(pop_reddits)
+        # evaluate the query and remove the ones with
+        # allow_top==False.  Note that because this filtering is done
+        # after the query is run, if there are a lot of top reddits
+        # with allow_top==False, we may return fewer than `limit`
+        # results.
+        srs = filter(lambda sr: sr.allow_top, pop_reddits)
+
+        return srs[:limit] if limit else srs
 
     @classmethod
     def default_subreddits(cls, ids = True, limit = g.num_default_reddits):