fix popular reddits, add new incremental query fn

This commit is contained in:
steve
2008-06-20 14:26:36 -04:00
parent 28504ac4f4
commit ef807ec94f
7 changed files with 57 additions and 51 deletions

View File

@@ -19,17 +19,27 @@
# All portions of the code written by CondeNet are Copyright (c) 2006-2008
# CondeNet, Inc. All Rights Reserved.
################################################################################
from r2.models import Link
from r2.models import Link, Subreddit
from r2.lib import utils
#stubs
def incr_counts(wrapped):
pass
def get_counts(period = '12 hours'):
def get_link_counts(period = '12 hours'):
links = Link._query(Link.c._date >= utils.timeago(period),
limit=50, data = True)
return dict((l._fullname, (0, l.sr_id)) for l in links)
def get_sr_counts(period = '12 hours'):
srs = Subreddit._query()
return dict((l._fullname, (0, l.sr_id)) for l in links)
def clear_sr_counts(names):
pass
try:
from r2admin.lib.count import *
except ImportError:

View File

@@ -41,7 +41,7 @@ def keep_link(link):
def cached_organic_links(username):
user = Account._by_name(username)
sr_count = count.get_counts()
sr_count = count.get_link_counts()
srs = Subreddit.user_subreddits(user)
link_names = filter(lambda n: sr_count[n][1] in srs, sr_count.keys())
link_names.sort(key = lambda n: sr_count[n][0])

View File

@@ -29,7 +29,7 @@ from datetime import datetime
cache = g.cache
def calc_rising():
sr_count = count.get_counts()
sr_count = count.get_link_counts()
link_count = dict((k, v[0]) for k,v in sr_count.iteritems())
link_names = Link._by_fullname(sr_count.keys(), data=True)

View File

@@ -21,45 +21,15 @@
################################################################################
from r2.models import Subreddit
from r2.lib.db.operators import desc
# def pop_reddits():
# from r2.lib import count
# counts = count.get_counts()
# num_views = {}
# for num, sr in counts.values():
# info = num_views.setdefault(sr, [0, 0, 0])
# info[0] += num
# info[1] += 1
# info[2] = info[0] / info[1]
# pop = num_views.items()
# pop.sort(key = lambda x: x[1][2], reverse = True)
# return [i[0] for i in pop[:30]]
from r2.lib import count
def all_srs():
#can't use > 0 yet cause we'd have to cast, which requires some
#changes to tdb_sql
limit = 100
q = Subreddit._query(Subreddit.c.valid_votes != 0,
limit = limit,
sort = desc('_date'),
data = True)
srs = list(q)
while srs:
for sr in srs:
yield sr
srs = list(q._after(sr)) if len(srs) == limit else None
def update_sr(sr):
count = sr.valid_votes
if count != sr._downs and count > 0:
sr._downs = count
sr._commit()
sr._incr('valid_votes', -count)
elif count < 0:
#just in case
sr.valid_votes = 0
sr._commit()
def run():
for sr in all_srs():
update_sr(sr)
sr_counts = count.get_sr_counts()
names = [k for k, v in sr_counts.iteritems() if v != 0]
srs = Subreddit._by_fullname(names)
for name in names:
sr,c = srs[name], sr_counts[name]
if c != sr._downs and c > 0:
sr._downs = max(c, 0)
sr._commit()
count.clear_sr_counts(names)

View File

@@ -383,7 +383,7 @@ def reindex_all(types = None, delete_all_first=False):
timeago("50 years"),
start_t):
r = tokenize_things(batch)
count += len(r)
print ("Processing %s #%d(%s): %s"
% (cls.__name__, count, q.qsize(), r[0]['contents']))

View File

@@ -22,9 +22,10 @@
from urllib import unquote_plus, quote_plus, urlopen, urlencode
from urlparse import urlparse, urlunparse
from threading import local
from copy import deepcopy
import cPickle as pickle
import sha
import re, datetime, math, random, string
import re, datetime, math, random, string, sha
from pylons.i18n import ungettext, _
@@ -579,7 +580,6 @@ def fetch_things(t_class,since,until,batch_fn=None,
"""
from r2.lib.db.operators import asc
from copy import deepcopy
if not batch_fn:
batch_fn = lambda x: x
@@ -607,6 +607,30 @@ def fetch_things(t_class,since,until,batch_fn=None,
q._after(t)
things = list(q)
def fetch_things2(query, chunk_size = 100, batch_fn = None):
"""Incrementally run query with a limit of chunk_size until there are
no results left. batch_fn transforms the results for each chunk
before returning."""
orig_rules = deepcopy(query._rules)
query._limit = chunk_size
items = list(query)
done = False
while items and not done:
#don't need to query again at the bottom if we didn't get enough
if len(items) < chunk_size:
done = True
if batch_fn:
items = batch_fn(items)
for i in items:
yield i
if not done:
query._rules = deepcopy(orig_rules)
query._after(i)
items = list(query)
def set_emptying_cache():
"""
The default thread-local cache is a regular dictionary, which

View File

@@ -53,6 +53,8 @@ class Vote(MultiRelation('vote',
@classmethod
def vote(cls, sub, obj, dir, ip, spam = False, organic = False):
from admintools import valid_user, valid_thing, update_score
from r2.lib.count import incr_counts
sr = obj.subreddit_slow
kind = obj.__class__.__name__.lower()
karma = sub.karma(kind, sr)
@@ -105,9 +107,9 @@ class Vote(MultiRelation('vote',
author.incr_karma(kind, sr, up_change - down_change)
#update the sr's valid vote count
# if is_new and v.valid_thing and kind == 'link':
# if sub._id != obj.author_id:
# sr._incr('valid_votes', 1)
if is_new and v.valid_thing and kind == 'link':
if sub._id != obj.author_id:
incr_counts([sr])
#expire the sr
if kind == 'link' and v.valid_thing: