fix popular reddits, add new incremental query fn

2026-01-28 08:17:58 -05:00 · 2008-06-20 14:26:36 -04:00
parent 28504ac4f4
commit ef807ec94f
7 changed files with 57 additions and 51 deletions
--- a/r2/r2/lib/count.py
+++ b/r2/r2/lib/count.py
@@ -19,17 +19,27 @@
 # All portions of the code written by CondeNet are Copyright (c) 2006-2008
 # CondeNet, Inc. All Rights Reserved.
 ################################################################################
-from r2.models import Link
+
+from r2.models import Link, Subreddit
 from r2.lib import utils

+#stubs
+
 def incr_counts(wrapped):
    pass

-def get_counts(period = '12 hours'):
+def get_link_counts(period = '12 hours'):
    links = Link._query(Link.c._date >= utils.timeago(period),
                        limit=50, data = True)
    return dict((l._fullname, (0, l.sr_id)) for l in links)

+def get_sr_counts(period = '12 hours'):
+    srs = Subreddit._query()
+    return dict((l._fullname, (0, l.sr_id)) for l in links)
+
+def clear_sr_counts(names):
+    pass
+
 try:
    from r2admin.lib.count import *
 except ImportError:
--- a/r2/r2/lib/organic.py
+++ b/r2/r2/lib/organic.py
@@ -41,7 +41,7 @@ def keep_link(link):
 def cached_organic_links(username):
    user = Account._by_name(username)

-    sr_count = count.get_counts()
+    sr_count = count.get_link_counts()
    srs = Subreddit.user_subreddits(user)
    link_names = filter(lambda n: sr_count[n][1] in srs, sr_count.keys())
    link_names.sort(key = lambda n: sr_count[n][0])
--- a/r2/r2/lib/rising.py
+++ b/r2/r2/lib/rising.py
@@ -29,7 +29,7 @@ from datetime import datetime
 cache = g.cache

 def calc_rising():
-    sr_count = count.get_counts()
+    sr_count = count.get_link_counts()
    link_count = dict((k, v[0]) for k,v in sr_count.iteritems())
    link_names = Link._by_fullname(sr_count.keys(), data=True)

--- a/r2/r2/lib/set_reddit_pops.py
+++ b/r2/r2/lib/set_reddit_pops.py
@@ -21,45 +21,15 @@
 ################################################################################
 from r2.models import Subreddit
 from r2.lib.db.operators import desc
-
-# def pop_reddits():
-#     from r2.lib import count
-#     counts = count.get_counts()
-#     num_views = {}
-#     for num, sr in counts.values():
-#         info = num_views.setdefault(sr, [0, 0, 0])
-#         info[0] += num
-#         info[1] += 1
-#         info[2] = info[0] / info[1]
-#     pop = num_views.items()
-#     pop.sort(key = lambda x: x[1][2], reverse = True)
-#     return [i[0] for i in pop[:30]]
+from r2.lib import count
    
-def all_srs():
-    #can't use > 0 yet cause we'd have to cast, which requires some
-    #changes to tdb_sql
-    limit = 100
-    q = Subreddit._query(Subreddit.c.valid_votes != 0,
-                         limit = limit,
-                         sort = desc('_date'),
-                         data = True)
-    srs = list(q)
-    while srs:
-        for sr in srs:
-            yield sr
-        srs = list(q._after(sr)) if len(srs) == limit else None
-
-def update_sr(sr):
-    count = sr.valid_votes
-    if count != sr._downs and count > 0:
-        sr._downs = count
-        sr._commit()
-        sr._incr('valid_votes', -count)
-    elif count < 0:
-        #just in case
-        sr.valid_votes = 0
-        sr._commit()
-
 def run():
-    for sr in all_srs():
-        update_sr(sr)
+    sr_counts = count.get_sr_counts()
+    names = [k for k, v in sr_counts.iteritems() if v != 0]
+    srs = Subreddit._by_fullname(names)
+    for name in names:
+        sr,c = srs[name], sr_counts[name]
+        if c != sr._downs and c > 0:
+            sr._downs = max(c, 0)
+            sr._commit()
+    count.clear_sr_counts(names)
--- a/r2/r2/lib/solrsearch.py
+++ b/r2/r2/lib/solrsearch.py
@@ -383,7 +383,7 @@ def reindex_all(types = None, delete_all_first=False):
                                       timeago("50 years"),
                                       start_t):
                r = tokenize_things(batch)
-                
+
                count += len(r)
                print ("Processing %s #%d(%s): %s"
                       % (cls.__name__, count, q.qsize(), r[0]['contents']))
--- a/r2/r2/lib/utils/utils.py
+++ b/r2/r2/lib/utils/utils.py
@@ -22,9 +22,10 @@
 from urllib import unquote_plus, quote_plus, urlopen, urlencode
 from urlparse import urlparse, urlunparse
 from threading import local
+from copy import deepcopy
 import cPickle as pickle
-import sha
-import re, datetime, math, random, string
+import re, datetime, math, random, string, sha
+

 from pylons.i18n import ungettext, _
        
@@ -579,7 +580,6 @@ def fetch_things(t_class,since,until,batch_fn=None,
    """

    from r2.lib.db.operators import asc
-    from copy import deepcopy

    if not batch_fn:
        batch_fn = lambda x: x
@@ -607,6 +607,30 @@ def fetch_things(t_class,since,until,batch_fn=None,
        q._after(t)
        things = list(q)

+def fetch_things2(query, chunk_size = 100, batch_fn = None):
+    """Incrementally run query with a limit of chunk_size until there are
+    no results left. batch_fn transforms the results for each chunk
+    before returning."""
+    orig_rules = deepcopy(query._rules)
+    query._limit = chunk_size
+    items = list(query)
+    done = False
+    while items and not done:
+        #don't need to query again at the bottom if we didn't get enough
+        if len(items) < chunk_size:
+            done = True
+
+        if batch_fn:
+            items = batch_fn(items)
+
+        for i in items:
+            yield i
+
+        if not done:
+            query._rules = deepcopy(orig_rules)
+            query._after(i)
+            items = list(query)
+
 def set_emptying_cache():
    """
        The default thread-local cache is a regular dictionary, which
--- a/r2/r2/models/vote.py
+++ b/r2/r2/models/vote.py
@@ -53,6 +53,8 @@ class Vote(MultiRelation('vote',
    @classmethod
    def vote(cls, sub, obj, dir, ip, spam = False, organic = False):
        from admintools import valid_user, valid_thing, update_score
+        from r2.lib.count import incr_counts
+
        sr = obj.subreddit_slow
        kind = obj.__class__.__name__.lower()
        karma = sub.karma(kind, sr)
@@ -105,9 +107,9 @@ class Vote(MultiRelation('vote',
            author.incr_karma(kind, sr, up_change - down_change)

        #update the sr's valid vote count
-#         if is_new and v.valid_thing and kind == 'link':
-#             if sub._id != obj.author_id:
-#                 sr._incr('valid_votes', 1)
+        if is_new and v.valid_thing and kind == 'link':
+            if sub._id != obj.author_id:
+                incr_counts([sr])

        #expire the sr
        if kind == 'link' and v.valid_thing: