* now caching one organic list per user again

* tracking for promoted links is now done via an authenticated hash to prevent replays
* organic list now properly ignores spam (even for moderators) and deleted links
* clicks and views on promoted links are now only tracked when they occur in the organic box or on the comments page
* organic boxes for users with lots of Subreddit subscriptions no longer pollute the cache
* properly validating URLs for promoted links (same rules as for regular submissions)
This commit is contained in:
ketralnis
2008-12-01 14:11:25 -08:00
parent ff29f63326
commit e18163e832
17 changed files with 192 additions and 104 deletions

View File

@@ -49,6 +49,7 @@ from r2.config import cache
from r2.lib.jsonresponse import JsonResponse, Json
from r2.lib.jsontemplates import api_type
from r2.lib import cssfilter
from r2.lib import tracking
from r2.lib.media import force_thumbnail, thumbnail_url
from simplejson import dumps
@@ -56,8 +57,7 @@ from simplejson import dumps
from datetime import datetime, timedelta
from md5 import md5
from r2.lib.promote import promote, unpromote
from r2.controllers import promotecontroller
from r2.lib.promote import promote, unpromote, get_promoted
def link_listing_by_url(url, count = None):
try:
@@ -1316,9 +1316,10 @@ class ApiController(RedditController):
@Json
@validate(VSponsor(),
link = VLink('link_id'),
ValidDomain('url'),
l = VLink('link_id'),
title = VTitle('title'),
url = nop('url'),
url = VUrl(['url', 'sr']),
sr = VSubmitSR('sr'),
subscribers_only = VBoolean('subscribers_only'),
disable_comments = VBoolean('disable_comments'),
@@ -1330,7 +1331,18 @@ class ApiController(RedditController):
disable_comments,
timelimit = None, timelimitlength = None, timelimittype = None,
disable_expire = None,
link = None):
l = None):
res._update('status', innerHTML = '')
if isinstance(url, str):
# VUrl may have modified the URL to make it valid, like
# adding http://
res._update('url', value=url)
elif isinstance(url, tuple) and isinstance(url[0], Link):
# there's already one or more links with this URL, but
# we're allowing mutliple submissions, so we really just
# want the URL
url = url[0].url
if res._chk_error(errors.NO_TITLE):
res._focus('title')
elif res._chk_errors((errors.NO_URL,errors.BAD_URL)):
@@ -1339,39 +1351,39 @@ class ApiController(RedditController):
res._focus('sr')
elif timelimit and res._chk_error(errors.BAD_NUMBER):
res._focus('timelimitlength')
elif link:
link.title = title
link.url = url
elif l:
l.title = title
l.url = url
link.promoted_subscribersonly = subscribers_only
link.disable_comments = disable_comments
l.promoted_subscribersonly = subscribers_only
l.disable_comments = disable_comments
if disable_expire:
link.promote_until = None
l.promote_until = None
elif timelimit and timelimitlength and timelimittype:
link.promote_until = timefromnow("%d %s" % (timelimitlength, timelimittype))
l.promote_until = timefromnow("%d %s" % (timelimitlength, timelimittype))
link._commit()
l._commit()
res._redirect('/promote/edit_promo/%s' % to36(link._id))
res._redirect('/promote/edit_promo/%s' % to36(l._id))
else:
link = Link(title = title,
url = url,
author_id = c.user._id,
sr_id = sr._id)
l = Link(title = title,
url = url,
author_id = c.user._id,
sr_id = sr._id)
if timelimit and timelimitlength and timelimittype:
promote_until = timefromnow("%d %s" % (timelimitlength, timelimittype))
else:
promote_until = None
link._commit()
l._commit()
promote(link, subscribers_only = subscribers_only,
promote(l, subscribers_only = subscribers_only,
promote_until = promote_until,
disable_comments = disable_comments)
res._redirect('/promote/edit_promo/%s' % to36(link._id))
res._redirect('/promote/edit_promo/%s' % to36(l._id))
def GET_link_thumb(self, *a, **kw):
"""
@@ -1397,3 +1409,29 @@ class ApiController(RedditController):
return UploadedImage(_('saved'), thumbnail_url(link), "upload",
errors = errors).render()
@Json
@validate(ids = nop("ids"))
def POST_onload(self, res, ids, *a, **kw):
ids = set(ids.split(','))
promoted = set(get_promoted())
promoted = ids.intersection(promoted)
if promoted:
links = {}
promoted = Link._by_fullname(promoted, data = True, return_dict = False)
for l in promoted:
links[l._fullname] = [
tracking.PromotedLinkInfo.gen_url(fullname=l._fullname,
ip = request.ip),
tracking.PromotedLinkClickInfo.gen_url(fullname = l._fullname,
dest = l.url,
ip = request.ip)
]
res.object = links
else:
res.object = {}

View File

@@ -201,7 +201,7 @@ class HotController(FixListing, ListingController):
if len(o.things) > 0:
# only pass through a listing if the links made it
# through the builder in organic_links *and* ours
# through our builder
organic.update_pos(pos+1, calculation_key)
return o

View File

@@ -38,7 +38,7 @@ class PromoteController(RedditController):
def GET_current_promos(self):
current_list = promote.get_promoted()
b = IDBuilder([ x._fullname for x in current_list])
b = IDBuilder(current_list)
render_list = b.get_items()[0]

View File

@@ -230,9 +230,7 @@ class SelfEmptyingCache(LocalCache):
def maybe_reset(self):
if len(self) > self.max_size:
print "SelfEmptyingCache clearing!"
self.clear()
print "Cleared (%d)" % len(self)
def set(self,key,val,time = 0):
self.maybe_reset()

View File

@@ -32,6 +32,7 @@ import random
from time import time
organic_lifetime = 5*60
organic_length = 30
# how many regular organic links should show between promoted ones
promoted_every_n = 5
@@ -40,7 +41,9 @@ def keep_link(link):
return not any((link.likes != None,
link.saved,
link.clicked,
link.hidden))
link.hidden,
link._deleted,
link._spam))
def insert_promoted(link_names, sr_ids, logged_in):
"""
@@ -58,38 +61,46 @@ def insert_promoted(link_names, sr_ids, logged_in):
else:
return keep_link(l)
# remove any that the user has acted on
builder = IDBuilder([ x._fullname for x in promoted_items ],
skip = True, keep_fn = my_keepfn)
promoted_items = builder.get_items()[0]
# no point in running the builder over more promoted links than
# we'll even use
max_promoted = max(1,len(link_names)/promoted_every_n)
# in the future, we may want to weight this sorting somehow
random.shuffle(promoted_items)
# remove any that the user has acted on
builder = IDBuilder(promoted_items,
skip = True, keep_fn = my_keepfn,
num = max_promoted)
promoted_items = builder.get_items()[0]
if not promoted_items:
return
# don't insert one at the head of the list 50% of the time for
# logged in users, and 50% of the time for logged-off users when
# the pool of promoted links is less than 3 (to avoid showing the
# same promoted link to the same person too often)
if c.user_is_loggedin or len(promoted_items) < 3:
skip_first = random.choice((True,False))
else:
skip_first = False
if logged_in or len(promoted_items) < 3:
promoted_items.insert(0, None)
# insert one promoted item for every N items
for i, item in enumerate(promoted_items):
pos = i * promoted_every_n
if pos > len(link_names):
break
elif pos == 0 and skip_first:
elif item is None:
continue
else:
link_names.insert(pos, promoted_items[i]._fullname)
@memoize('cached_organic_links', time = organic_lifetime)
def cached_organic_links(sr_ids, logged_in):
def cached_organic_links(user_id, langs):
if user_id is None:
sr_ids = Subreddit.default_srs(langs, ids = True)
else:
user = Account._byID(user_id, data=True)
sr_ids = Subreddit.user_subreddits(user)
sr_count = count.get_link_counts()
#only use links from reddits that you're subscribed to
@@ -107,26 +118,33 @@ def cached_organic_links(sr_ids, logged_in):
new_item = random.choice(items[1:4])
link_names.insert(0, new_item._fullname)
insert_promoted(link_names, sr_ids, logged_in)
builder = IDBuilder(link_names, num = 30, skip = True, keep_fn = keep_link)
links = builder.get_items()[0]
# remove any that the user has acted on
builder = IDBuilder(link_names,
skip = True, keep_fn = keep_link,
num = organic_length)
link_names = [ x._fullname for x in builder.get_items()[0] ]
calculation_key = str(time())
update_pos(0, calculation_key)
# in case of duplicates (inserted by the random up-and-coming link
# or a promoted link), return only the first
ret = [l._fullname for l in UniqueIterator(links)]
insert_promoted(link_names, sr_ids, user_id is not None)
# remove any duplicates caused by insert_promoted
ret = [ l for l in UniqueIterator(link_names) ]
return (calculation_key, ret)
def organic_links(user):
from r2.controllers.reddit_base import organic_pos
sr_ids = Subreddit.user_subreddits(user, limit = None)
# make sure that these are sorted so the cache keys are constant
sr_ids.sort()
sr_ids = Subreddit.user_subreddits(user)
cached_key, links = cached_organic_links(sr_ids, c.user_is_loggedin)
if c.user_is_loggedin:
cached_key, links = cached_organic_links(user._id, None)
else:
cached_key, links = cached_organic_links(None, c.content_langs)
cookie_key, pos = organic_pos()
# pos will be 0 if it wasn't specified

View File

@@ -83,8 +83,7 @@ def get_promoted_cached():
return [ x._fullname for x in links if x not in expired_links ]
def get_promoted():
fullnames = get_promoted_cached()
return Link._by_fullname(fullnames, data = True, return_dict = False)
return get_promoted_cached()
def promote_builder_wrapper(alternative_wrapper):
def wrapper(thing):

View File

@@ -25,6 +25,7 @@ from Crypto.Cipher import AES
from random import choice
from pylons import g, c
from urllib import quote_plus, unquote_plus
import sha
key_len = 16
pad_len = 32
@@ -86,11 +87,11 @@ def safe_str(text):
class Info(object):
'''Class for generating and reading user tracker information.'''
__slots__ = []
_tracked = []
tracker_url = ""
def __init__(self, text = '', **kw):
for s in self.__slots__:
for s in self._tracked:
setattr(self, s, '')
if text:
@@ -100,8 +101,8 @@ class Info(object):
g.log.error("decryption failure on '%s'" % text)
data = []
for i, d in enumerate(data):
if i < len(self.__slots__):
setattr(self, self.__slots__[i], d)
if i < len(self._tracked):
setattr(self, self._tracked[i], d)
else:
self.init_defaults(**kw)
@@ -109,7 +110,7 @@ class Info(object):
raise NotImplementedError
def tracking_url(self):
data = '|'.join(getattr(self, s) for s in self.__slots__)
data = '|'.join(getattr(self, s) for s in self._tracked)
data = encrypt(data)
return "%s?v=%s" % (self.tracker_url, data)
@@ -131,7 +132,7 @@ class Info(object):
class UserInfo(Info):
'''Class for generating and reading user tracker information.'''
__slots__ = ['name', 'site', 'lang']
_tracked = ['name', 'site', 'lang']
tracker_url = g.tracker_url
def init_defaults(self):
@@ -140,13 +141,28 @@ class UserInfo(Info):
self.lang = safe_str(c.lang if c.lang else '')
class PromotedLinkInfo(Info):
__slots__ = ['fullname']
_tracked = []
tracker_url = g.adtracker_url
def __init__(self, text = "", ip = "0.0.0.0", **kw):
self.ip = ip
Info.__init__(self, text = text, **kw)
def init_defaults(self, fullname):
self.fullname = fullname
@classmethod
def make_hash(cls, ip, fullname):
return sha.new("%s%s%s" % (ip, fullname,
g.tracking_secret)).hexdigest()
def tracking_url(self):
return (self.tracker_url + "?hash=" +
self.make_hash(self.ip, self.fullname)
+ "&id=" + self.fullname)
class PromotedLinkClickInfo(PromotedLinkInfo):
_tracked = []
tracker_url = g.clicktracker_url
def init_defaults(self, dest, **kw):
@@ -155,8 +171,7 @@ class PromotedLinkClickInfo(PromotedLinkInfo):
return PromotedLinkInfo.init_defaults(self, **kw)
def tracking_url(self):
s = PromotedLinkInfo.tracking_url(self) + '&url=' + self.dest
g.log.debug("generated %s" % s)
s = (PromotedLinkInfo.tracking_url(self) + '&url=' + self.dest)
return s
def benchmark(n = 10000):

View File

@@ -238,7 +238,6 @@ class Link(Thing, Printable):
from r2.lib.count import incr_counts
from r2.lib.media import thumbnail_url
from r2.lib.utils import timeago
from r2.lib.tracking import PromotedLinkClickInfo
saved = Link._saved(user, wrapped) if user else {}
hidden = Link._hidden(user, wrapped) if user else {}
@@ -291,17 +290,6 @@ class Link(Thing, Printable):
item.nofollow = True
else:
item.nofollow = False
if item.promoted and g.clicktracker_url:
# promoted links' clicks are tracked, so here we are
# changing its URL to be the tracking/redirecting URL.
# This can't be done in PromotedLink.add_props because
# we still want to track clicks for links that haven't
# been wrapped as PromotedLinks, as in regular
# listings
item.url = PromotedLinkClickInfo.gen_url(fullname = item._fullname,
dest = item.url)
if c.user_is_loggedin:
incr_counts(wrapped)

View File

@@ -51,6 +51,7 @@ class Subreddit(Thing, Printable):
show_media = False,
domain = None,
)
sr_limit = 50
@classmethod
def _new(self, name, title, lang = 'en', type = 'public',
@@ -290,7 +291,7 @@ class Subreddit(Thing, Printable):
return [s._id for s in pop_reddits] if ids else list(pop_reddits)
@classmethod
def user_subreddits(cls, user, limit = 50):
def user_subreddits(cls, user, limit = sr_limit):
"""subreddits that appear in a user's listings. returns the default
srs if there are no subscriptions."""
if user and user.has_subscribed:

View File

@@ -2,6 +2,7 @@ function Thing(id) {
this.__init__(id);
};
var reddit_thing_info = {fetch: []};
Thing.prototype = {
__init__: function(id) {
this._id = id;
@@ -102,6 +103,18 @@ Thing.prototype = {
else {
show(this.row);
}
/* promoted magic */
if(reddit_thing_info && reddit_thing_info[this._id]) {
var img = this.$("promote_img");
var img_src = unsafe(reddit_thing_info[this._id][0]);
var new_url = unsafe(reddit_thing_info[this._id][1]);
if (img && img.src != img_src) {
img.src = img_src;
this.$("title").href = new_url;
if(this.$("thumbnail"))
this.$("thumbnail").href = new_url;
}
}
},
del: function(do_animate) {

View File

@@ -132,14 +132,6 @@ OrganicListing.prototype.change = function(dir) {
c.fade("veryfast");
add_to_aniframes(function() {
c.hide();
if(n.$('promoted')) {
var i = new Image();
i.src = n.$('promoted').tracking_url.value;
// just setting i.src is enough to most browsers to
// download, but in Opera it's not, we'd need to
// uncomment this line to get the image in the DOM
/* n.$('promoted').appendChild(i); */
}
n.show();
n.set_opacity(0);
}, 1);

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 B

View File

@@ -590,16 +590,27 @@ before enabling */
border: none;
}
.organic-help-button { padding: 0 .5ex; }
.sponsored-tagline {
.organic-listing .sponsored-tagline {
color: #808080;
bottom: 0;
margin: 0 5px 5px 0;
position: absolute;
right: 6.2em;
right: 6.3em;
display: block;
}
.sponsored-tagline {
display: none;
}
.promote-pixel {
position: absolute;
top: -1000px;
right: -1000px;
}
.organic-help-button { padding: 0 .5ex; }
.infobar {
background-color: #f6e69f;
padding: 5px 10px;

View File

@@ -67,10 +67,29 @@ function init() {
/*updateMods();*/
}
stc = $("siteTable_comments");
/* onload populates reddit_link_info, so checking its contents
* ensures it doesn't get called on reload/refresh */
if ( reddit_thing_info.fetch && reddit_thing_info.fetch.length != 0 )
redditRequest("onload", {ids: reddit_thing_info.fetch.join(",")},
handleOnLoad);
update_reddit_count();
}
function handleOnLoad(r) {
r = parse_response(r);
var f = reddit_thing_info.fetch;
reddit_thing_info = (r && r.response) ? r.response.object : {};
for (var i = 0; i < f.length; i++) {
var l = new Link(f[i]);
if (l.row && l.row.style.display != "none")
l.show();
}
reddit_thing_info.fetch = [];
}
function deletetoggle(link, type) {
var parent = link.parentNode;
var form = parent.parentNode;

View File

@@ -23,7 +23,6 @@
<%!
from r2.models.subreddit import Default
from r2.lib.template_helpers import get_domain
from r2.lib import tracking
%>
<%inherit file="printable.html"/>
@@ -81,23 +80,6 @@
${self.admintagline()}
</ul>
${self.mediadiv()}
%if thing.promoted:
<%
tracking_url = tracking.PromotedLinkInfo.gen_url(fullname=thing._fullname)
%>
<form id="promoted_${thing._fullname}" method="GET"
action="about:blank" style="display: none;">
<input type="hidden" name="tracking_url" value="${tracking_url}" />
</form>
<script type="text/javascript">
/* see also OrganicListing.prototype.change()
dirty, dirty hack. But some people are into that */
if(!"$display") {
var i = new Image();
i.src = "${tracking_url}";
}
</script>
%endif
</%def>
<%def name="subreddit()" buffered="True">

View File

@@ -23,16 +23,23 @@
<%namespace file="help.html" import="help_or_hide"/>
<%
from r2.lib.template_helpers import replace_render, static
from r2.lib.promote import get_promoted
%>
<div id="siteTable_organic" class="organic-listing">
<%
promoted = set(get_promoted())
promoted = [o for o in thing.org_links if o in promoted]
%>
<div id="ajaxHook_organic" class="ajaxhook"> </div>
<script type="text/javascript">
## fortunately, python arrays print in JS-compatible form.
var organic_links = ${thing.org_links};
reddit_thing_info.fetch = ${promoted};
</script>
%for a in thing.things:
${unsafe(replace_render(thing, a, display = (thing.visible_link == a._fullname)))}
${unsafe(replace_render(thing, a,
display = (thing.visible_link == a._fullname)))}
%endfor
<div class="nextprev">

View File

@@ -34,4 +34,11 @@
<p class="sponsored-tagline">
${_('sponsored link')} |
</p>
</%def>
<img class="promote-pixel"
id="promote_img_${thing._fullname}"
src="/static/pixel.png" alt=" " />
<script type="text/javascript">
reddit_thing_info.fetch.push("${thing._fullname}");
</script>
</%def>