diff --git a/r2/r2/lib/count.py b/r2/r2/lib/count.py index 913937125..220ed6ed9 100644 --- a/r2/r2/lib/count.py +++ b/r2/r2/lib/count.py @@ -19,17 +19,27 @@ # All portions of the code written by CondeNet are Copyright (c) 2006-2008 # CondeNet, Inc. All Rights Reserved. ################################################################################ -from r2.models import Link + +from r2.models import Link, Subreddit from r2.lib import utils +#stubs + def incr_counts(wrapped): pass -def get_counts(period = '12 hours'): +def get_link_counts(period = '12 hours'): links = Link._query(Link.c._date >= utils.timeago(period), limit=50, data = True) return dict((l._fullname, (0, l.sr_id)) for l in links) +def get_sr_counts(period = '12 hours'): + srs = Subreddit._query() + return dict((l._fullname, (0, l.sr_id)) for l in links) + +def clear_sr_counts(names): + pass + try: from r2admin.lib.count import * except ImportError: diff --git a/r2/r2/lib/organic.py b/r2/r2/lib/organic.py index d817d9557..9d19a17f9 100644 --- a/r2/r2/lib/organic.py +++ b/r2/r2/lib/organic.py @@ -41,7 +41,7 @@ def keep_link(link): def cached_organic_links(username): user = Account._by_name(username) - sr_count = count.get_counts() + sr_count = count.get_link_counts() srs = Subreddit.user_subreddits(user) link_names = filter(lambda n: sr_count[n][1] in srs, sr_count.keys()) link_names.sort(key = lambda n: sr_count[n][0]) diff --git a/r2/r2/lib/rising.py b/r2/r2/lib/rising.py index 58f87837b..605e26c42 100644 --- a/r2/r2/lib/rising.py +++ b/r2/r2/lib/rising.py @@ -29,7 +29,7 @@ from datetime import datetime cache = g.cache def calc_rising(): - sr_count = count.get_counts() + sr_count = count.get_link_counts() link_count = dict((k, v[0]) for k,v in sr_count.iteritems()) link_names = Link._by_fullname(sr_count.keys(), data=True) diff --git a/r2/r2/lib/set_reddit_pops.py b/r2/r2/lib/set_reddit_pops.py index 9e228e5b8..de8c16922 100644 --- a/r2/r2/lib/set_reddit_pops.py +++ b/r2/r2/lib/set_reddit_pops.py @@ -21,45 +21,15 @@ ################################################################################ from r2.models import Subreddit from r2.lib.db.operators import desc - -# def pop_reddits(): -# from r2.lib import count -# counts = count.get_counts() -# num_views = {} -# for num, sr in counts.values(): -# info = num_views.setdefault(sr, [0, 0, 0]) -# info[0] += num -# info[1] += 1 -# info[2] = info[0] / info[1] -# pop = num_views.items() -# pop.sort(key = lambda x: x[1][2], reverse = True) -# return [i[0] for i in pop[:30]] +from r2.lib import count -def all_srs(): - #can't use > 0 yet cause we'd have to cast, which requires some - #changes to tdb_sql - limit = 100 - q = Subreddit._query(Subreddit.c.valid_votes != 0, - limit = limit, - sort = desc('_date'), - data = True) - srs = list(q) - while srs: - for sr in srs: - yield sr - srs = list(q._after(sr)) if len(srs) == limit else None - -def update_sr(sr): - count = sr.valid_votes - if count != sr._downs and count > 0: - sr._downs = count - sr._commit() - sr._incr('valid_votes', -count) - elif count < 0: - #just in case - sr.valid_votes = 0 - sr._commit() - def run(): - for sr in all_srs(): - update_sr(sr) + sr_counts = count.get_sr_counts() + names = [k for k, v in sr_counts.iteritems() if v != 0] + srs = Subreddit._by_fullname(names) + for name in names: + sr,c = srs[name], sr_counts[name] + if c != sr._downs and c > 0: + sr._downs = max(c, 0) + sr._commit() + count.clear_sr_counts(names) diff --git a/r2/r2/lib/solrsearch.py b/r2/r2/lib/solrsearch.py index fc14653e9..59f0f5107 100644 --- a/r2/r2/lib/solrsearch.py +++ b/r2/r2/lib/solrsearch.py @@ -383,7 +383,7 @@ def reindex_all(types = None, delete_all_first=False): timeago("50 years"), start_t): r = tokenize_things(batch) - + count += len(r) print ("Processing %s #%d(%s): %s" % (cls.__name__, count, q.qsize(), r[0]['contents'])) diff --git a/r2/r2/lib/utils/utils.py b/r2/r2/lib/utils/utils.py index f98d405cf..2cc322a29 100644 --- a/r2/r2/lib/utils/utils.py +++ b/r2/r2/lib/utils/utils.py @@ -22,9 +22,10 @@ from urllib import unquote_plus, quote_plus, urlopen, urlencode from urlparse import urlparse, urlunparse from threading import local +from copy import deepcopy import cPickle as pickle -import sha -import re, datetime, math, random, string +import re, datetime, math, random, string, sha + from pylons.i18n import ungettext, _ @@ -579,7 +580,6 @@ def fetch_things(t_class,since,until,batch_fn=None, """ from r2.lib.db.operators import asc - from copy import deepcopy if not batch_fn: batch_fn = lambda x: x @@ -607,6 +607,30 @@ def fetch_things(t_class,since,until,batch_fn=None, q._after(t) things = list(q) +def fetch_things2(query, chunk_size = 100, batch_fn = None): + """Incrementally run query with a limit of chunk_size until there are + no results left. batch_fn transforms the results for each chunk + before returning.""" + orig_rules = deepcopy(query._rules) + query._limit = chunk_size + items = list(query) + done = False + while items and not done: + #don't need to query again at the bottom if we didn't get enough + if len(items) < chunk_size: + done = True + + if batch_fn: + items = batch_fn(items) + + for i in items: + yield i + + if not done: + query._rules = deepcopy(orig_rules) + query._after(i) + items = list(query) + def set_emptying_cache(): """ The default thread-local cache is a regular dictionary, which diff --git a/r2/r2/models/vote.py b/r2/r2/models/vote.py index 42296af3d..e2ce4d21f 100644 --- a/r2/r2/models/vote.py +++ b/r2/r2/models/vote.py @@ -53,6 +53,8 @@ class Vote(MultiRelation('vote', @classmethod def vote(cls, sub, obj, dir, ip, spam = False, organic = False): from admintools import valid_user, valid_thing, update_score + from r2.lib.count import incr_counts + sr = obj.subreddit_slow kind = obj.__class__.__name__.lower() karma = sub.karma(kind, sr) @@ -105,9 +107,9 @@ class Vote(MultiRelation('vote', author.incr_karma(kind, sr, up_change - down_change) #update the sr's valid vote count -# if is_new and v.valid_thing and kind == 'link': -# if sub._id != obj.author_id: -# sr._incr('valid_votes', 1) + if is_new and v.valid_thing and kind == 'link': + if sub._id != obj.author_id: + incr_counts([sr]) #expire the sr if kind == 'link' and v.valid_thing: