mirror of
https://github.com/reddit-archive/reddit.git
synced 2026-01-22 21:38:11 -05:00
LabeledMulti: Age-normalized hot
Update normalized_hot to allow certain LabeledMultis to specify slightly different weighting between subreddits compared to the current algorithm. Current merge algorithm always results in an N-subreddit multi having the first N results being the #1 result from each of the individual subreddits; this is not always ideal for slow subreddits (e.g., /r/announcements and /r/blog). Age-weighting allows a LabeledMulti to scale those older posts further down the list, and lets them drop off after a number of days. The age-weighting will require a change to the use of sgm in normalized_hot prior to full deployment, as the calculated ehot values are no longer global amongst all users.
This commit is contained in:
@@ -417,7 +417,8 @@ class HotController(ListingWithPromos):
|
||||
sr_ids = Subreddit.user_subreddits(c.user)
|
||||
return normalized_hot(sr_ids)
|
||||
elif isinstance(c.site, MultiReddit):
|
||||
return normalized_hot(c.site.kept_sr_ids, obey_age_limit=False)
|
||||
return normalized_hot(c.site.kept_sr_ids, obey_age_limit=False,
|
||||
ageweight=c.site.normalized_age_weight)
|
||||
else:
|
||||
if c.site.sticky_fullname:
|
||||
link_list = [c.site.sticky_fullname]
|
||||
|
||||
@@ -26,6 +26,7 @@ from datetime import datetime, timedelta
|
||||
|
||||
from pylons import g
|
||||
|
||||
from r2.config import feature
|
||||
from r2.lib.cache import sgm
|
||||
from r2.lib.db.queries import _get_links, CachedResults
|
||||
from r2.lib.db.sorts import epoch_seconds
|
||||
@@ -35,38 +36,62 @@ MAX_PER_SUBREDDIT = 150
|
||||
MAX_LINKS = 1000
|
||||
|
||||
|
||||
def get_hot_tuples(sr_ids):
|
||||
def get_hot_tuples(sr_ids, ageweight=None):
|
||||
queries_by_sr_id = {sr_id: _get_links(sr_id, sort='hot', time='all')
|
||||
for sr_id in sr_ids}
|
||||
CachedResults.fetch_multi(queries_by_sr_id.values())
|
||||
tuples_by_srid = {sr_id: [] for sr_id in sr_ids}
|
||||
|
||||
now_seconds = epoch_seconds(datetime.now(g.tz))
|
||||
|
||||
for sr_id, q in queries_by_sr_id.iteritems():
|
||||
if not q.data:
|
||||
continue
|
||||
|
||||
link_name, hot, timestamp = q.data[0]
|
||||
thot = max(hot, 1.)
|
||||
tuples_by_srid[sr_id].append((-1., -hot, link_name, timestamp))
|
||||
hot_factor = get_hot_factor(q.data[0], now_seconds, ageweight)
|
||||
|
||||
for link_name, hot, timestamp in q.data[1:MAX_PER_SUBREDDIT]:
|
||||
ehot = hot / thot
|
||||
for link_name, hot, timestamp in q.data[:MAX_PER_SUBREDDIT]:
|
||||
effective_hot = hot / hot_factor
|
||||
# heapq.merge sorts from smallest to largest so we need to flip
|
||||
# ehot and hot to get the hottest links first
|
||||
tuples_by_srid[sr_id].append((-ehot, -hot, link_name, timestamp))
|
||||
tuples_by_srid[sr_id].append(
|
||||
(-effective_hot, -hot, link_name, timestamp)
|
||||
)
|
||||
|
||||
return tuples_by_srid
|
||||
|
||||
|
||||
def normalized_hot(sr_ids, obey_age_limit=True):
|
||||
def get_hot_factor(qdata, now, ageweight):
|
||||
"""Return a "hot factor" score for a link's hot tuple.
|
||||
|
||||
Recalculate the item's hot score as if it had been submitted
|
||||
more recently than it was. This will cause the `effective_hot` value in
|
||||
get_hot_tuples to move older first items back
|
||||
|
||||
ageweight should be a float from 0.0 - 1.0, which "scales" how far
|
||||
between the original submission time and "now" to use as the base
|
||||
for the new hot score. Smaller values will favor older #1 posts in
|
||||
multireddits; larger values will drop older posts further in the ranking
|
||||
(or possibly off the ranking entirely).
|
||||
|
||||
"""
|
||||
ageweight = float(ageweight or 0.0)
|
||||
link_name, hot, timestamp = qdata
|
||||
return max(hot + ((now - timestamp) * ageweight) / 45000.0, 1.0)
|
||||
|
||||
|
||||
def normalized_hot(sr_ids, obey_age_limit=True, ageweight=None):
|
||||
timer = g.stats.get_timer("normalized_hot")
|
||||
timer.start()
|
||||
|
||||
if not sr_ids:
|
||||
return []
|
||||
|
||||
tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_hot_tuples,
|
||||
prefix='normalized_hot', time=g.page_cache_time)
|
||||
if ageweight and feature.is_enabled("scaled_normalized_hot"):
|
||||
tuples_by_srid = get_hot_tuples(sr_ids, ageweight=ageweight)
|
||||
else:
|
||||
tuples_by_srid = sgm(g.cache, sr_ids, miss_fn=get_hot_tuples,
|
||||
prefix='normalized_hot', time=g.page_cache_time)
|
||||
|
||||
if obey_age_limit:
|
||||
cutoff = datetime.now(g.tz) - timedelta(days=g.HOT_PAGE_AGE)
|
||||
|
||||
@@ -1352,6 +1352,10 @@ class DefaultSR(_DefaultSR):
|
||||
class MultiReddit(FakeSubreddit):
|
||||
name = 'multi'
|
||||
header = ""
|
||||
_defaults = dict(
|
||||
FakeSubreddit._defaults,
|
||||
normalized_age_weight=0.0,
|
||||
)
|
||||
|
||||
def __init__(self, path=None, srs=None):
|
||||
FakeSubreddit.__init__(self)
|
||||
@@ -1542,6 +1546,9 @@ class LabeledMulti(tdb_cassandra.Thing, MultiReddit):
|
||||
"date": pycassa.system_manager.DATE_TYPE,
|
||||
},
|
||||
}
|
||||
_float_props = (
|
||||
"base_normalized_age_weight",
|
||||
)
|
||||
_compare_with = tdb_cassandra.UTF8_TYPE
|
||||
_read_consistency_level = tdb_cassandra.CL.ONE
|
||||
_write_consistency_level = tdb_cassandra.CL.QUORUM
|
||||
|
||||
Reference in New Issue
Block a user